From 2283a64e571f48a6e21af2018e70fb123ef041e6 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Thu, 28 Oct 2021 18:49:58 +0200
Subject: [PATCH] ipq807x: backport eBPF layer from v5.10 kernel

Signed-off-by: John Crispin <john@phrozen.org>
---
 ...x-add-the-Qualcomm-AX-target-support.patch | 44765 +++++++++++++++-
 1 file changed, 44704 insertions(+), 61 deletions(-)

diff --git a/patches/0017-ipq807x-add-the-Qualcomm-AX-target-support.patch b/patches/0017-ipq807x-add-the-Qualcomm-AX-target-support.patch
index f05b7fe05..144b088bd 100644
--- a/patches/0017-ipq807x-add-the-Qualcomm-AX-target-support.patch
+++ b/patches/0017-ipq807x-add-the-Qualcomm-AX-target-support.patch
@@ -1,70 +1,71 @@
-From 9b33e2fdee38684fbc1eb08446f277823ee113e1 Mon Sep 17 00:00:00 2001
+From 164756923b3e89cb2fc825a80d4cc4236fb6dc89 Mon Sep 17 00:00:00 2001
 From: John Crispin <john@phrozen.org>
 Date: Sat, 18 Jul 2020 08:53:44 +0200
 Subject: [PATCH 01/30] ipq807x: add the Qualcomm AX target support
 
 Signed-off-by: John Crispin <john@phrozen.org>
 ---
- config/Config-kernel.in                       |    9 +
- include/image.mk                              |    6 +-
- include/kernel-version.mk                     |    2 +-
- package/boot/uboot-envtools/files/ipq807x     |   37 +
- .../etc/hotplug.d/firmware/11-ath10k-caldata  |    5 +
- target/linux/ipq807x/109-logspam.patch        |   24 +
- target/linux/ipq807x/Makefile                 |   22 +
- .../ipq807x/base-files/etc/board.d/01_leds    |   38 +
- .../ipq807x/base-files/etc/board.d/02_network |   82 +
- .../etc/hotplug.d/firmware/10-ath11k-caldata  |   95 ++
- .../ipq807x/base-files/etc/init.d/aq_phy      |   16 +
- .../ipq807x/base-files/etc/init.d/bootcount   |   12 +
- .../linux/ipq807x/base-files/etc/init.d/wdt   |   14 +
- ...G4_v5.4.B-AQR_CIG_WIFI_ID44715_VER1673.cld |  Bin 0 -> 391170 bytes
- .../base-files/lib/upgrade/platform.sh        |   72 +
- target/linux/ipq807x/config-4.4               |  828 +++++++++
- .../arm/boot/dts/qcom-ipq6018-cig-wf188.dts   |   18 +
- .../arm/boot/dts/qcom-ipq6018-cig-wf188n.dts  |   18 +
- .../boot/dts/qcom-ipq6018-edgecore-eap101.dts |   18 +
- .../boot/dts/qcom-ipq6018-miwifi-ax1800.dts   |   18 +
- .../boot/dts/qcom-ipq6018-wallys-dr6018.dts   |   18 +
- .../arch/arm/boot/dts/qcom-ipq807x-eap102.dts |   26 +
- .../arch/arm/boot/dts/qcom-ipq807x-eap106.dts |   26 +
- .../arch/arm/boot/dts/qcom-ipq807x-ex227.dts  |   26 +
- .../arch/arm/boot/dts/qcom-ipq807x-ex447.dts  |   26 +
- .../boot/dts/qcom-ipq807x-sercomm-wallaby.dts |   26 +
- .../arch/arm/boot/dts/qcom-ipq807x-wf194c.dts |   26 +
- .../arm/boot/dts/qcom-ipq807x-wf194c4.dts     |   26 +
- .../dts/qcom/qcom-ipq6018-miwifi-ax1800.dts   |  419 +++++
- .../dts/qcom/qcom-ipq6018-wallys-dr6018.dts   |  441 +++++
- .../boot/dts/qcom/qcom-ipq807x-eap102.dts     |  918 ++++++++++
- .../boot/dts/qcom/qcom-ipq807x-wf194c4.dts    |  942 ++++++++++
- target/linux/ipq807x/image/Makefile           |   26 +
- target/linux/ipq807x/image/ipq50xx.mk         |   10 +
- target/linux/ipq807x/image/ipq60xx.mk         |   56 +
- target/linux/ipq807x/image/ipq807x.mk         |   90 +
- target/linux/ipq807x/ipq50xx/config-default   |   84 +
- target/linux/ipq807x/ipq50xx/config-lowmem    |   73 +
- target/linux/ipq807x/ipq50xx/target.mk        |   10 +
- target/linux/ipq807x/ipq60xx/config-default   |  122 ++
- .../linux/ipq807x/ipq60xx/profiles/default.mk |    9 +
- target/linux/ipq807x/ipq60xx/target.mk        |    8 +
- target/linux/ipq807x/ipq807x/config-default   |   78 +
- .../linux/ipq807x/ipq807x/profiles/default.mk |    9 +
- target/linux/ipq807x/ipq807x/target.mk        |    7 +
- target/linux/ipq807x/modules.mk               |   61 +
- .../linux/ipq807x/patches/100-qrtr-ns.patch   |  976 +++++++++++
- .../linux/ipq807x/patches/101-squashfs.patch  |   16 +
- .../linux/ipq807x/patches/102-cig-wf188.patch |  869 ++++++++++
- .../ipq807x/patches/103-sercomm-wallaby.patch |  816 +++++++++
- target/linux/ipq807x/patches/104-wf194c.patch |  816 +++++++++
- .../patches/105-fix-dtc-gcc10-build.patch     |   11 +
- target/linux/ipq807x/patches/106-eap101.patch |  993 +++++++++++
- .../linux/ipq807x/patches/108-log-spam.patch  |   37 +
- target/linux/ipq807x/patches/109-tplink.patch | 1518 +++++++++++++++++
- .../ipq807x/patches/110-add-esmt-nand.patch   |   37 +
- target/linux/ipq807x/patches/111-eap106.patch |  765 +++++++++
- target/linux/ipq807x/patches/112-pstore.patch |  147 ++
- toolchain/kernel-headers/Makefile             |    8 +
- 59 files changed, 11904 insertions(+), 2 deletions(-)
+ config/Config-kernel.in                       |     9 +
+ include/image.mk                              |     6 +-
+ include/kernel-version.mk                     |     2 +-
+ package/boot/uboot-envtools/files/ipq807x     |    37 +
+ .../etc/hotplug.d/firmware/11-ath10k-caldata  |     5 +
+ target/linux/ipq807x/109-logspam.patch        |    24 +
+ target/linux/ipq807x/Makefile                 |    22 +
+ .../ipq807x/base-files/etc/board.d/01_leds    |    38 +
+ .../ipq807x/base-files/etc/board.d/02_network |    82 +
+ .../etc/hotplug.d/firmware/10-ath11k-caldata  |    95 +
+ .../ipq807x/base-files/etc/init.d/aq_phy      |    16 +
+ .../ipq807x/base-files/etc/init.d/bootcount   |    12 +
+ .../linux/ipq807x/base-files/etc/init.d/wdt   |    14 +
+ ...G4_v5.4.B-AQR_CIG_WIFI_ID44715_VER1673.cld |   Bin 0 -> 391170 bytes
+ .../base-files/lib/upgrade/platform.sh        |    72 +
+ target/linux/ipq807x/config-4.4               |   828 +
+ .../arm/boot/dts/qcom-ipq6018-cig-wf188.dts   |    18 +
+ .../arm/boot/dts/qcom-ipq6018-cig-wf188n.dts  |    18 +
+ .../boot/dts/qcom-ipq6018-edgecore-eap101.dts |    18 +
+ .../boot/dts/qcom-ipq6018-miwifi-ax1800.dts   |    18 +
+ .../boot/dts/qcom-ipq6018-wallys-dr6018.dts   |    18 +
+ .../arch/arm/boot/dts/qcom-ipq807x-eap102.dts |    26 +
+ .../arch/arm/boot/dts/qcom-ipq807x-eap106.dts |    26 +
+ .../arch/arm/boot/dts/qcom-ipq807x-ex227.dts  |    26 +
+ .../arch/arm/boot/dts/qcom-ipq807x-ex447.dts  |    26 +
+ .../boot/dts/qcom-ipq807x-sercomm-wallaby.dts |    26 +
+ .../arch/arm/boot/dts/qcom-ipq807x-wf194c.dts |    26 +
+ .../arm/boot/dts/qcom-ipq807x-wf194c4.dts     |    26 +
+ .../dts/qcom/qcom-ipq6018-miwifi-ax1800.dts   |   419 +
+ .../dts/qcom/qcom-ipq6018-wallys-dr6018.dts   |   441 +
+ .../boot/dts/qcom/qcom-ipq807x-eap102.dts     |   918 +
+ .../boot/dts/qcom/qcom-ipq807x-wf194c4.dts    |   942 +
+ target/linux/ipq807x/image/Makefile           |    26 +
+ target/linux/ipq807x/image/ipq50xx.mk         |    10 +
+ target/linux/ipq807x/image/ipq60xx.mk         |    56 +
+ target/linux/ipq807x/image/ipq807x.mk         |    90 +
+ target/linux/ipq807x/ipq50xx/config-default   |    84 +
+ target/linux/ipq807x/ipq50xx/config-lowmem    |    73 +
+ target/linux/ipq807x/ipq50xx/target.mk        |    10 +
+ target/linux/ipq807x/ipq60xx/config-default   |   122 +
+ .../linux/ipq807x/ipq60xx/profiles/default.mk |     9 +
+ target/linux/ipq807x/ipq60xx/target.mk        |     8 +
+ target/linux/ipq807x/ipq807x/config-default   |    78 +
+ .../linux/ipq807x/ipq807x/profiles/default.mk |     9 +
+ target/linux/ipq807x/ipq807x/target.mk        |     7 +
+ target/linux/ipq807x/modules.mk               |    61 +
+ .../linux/ipq807x/patches/100-qrtr-ns.patch   |   976 +
+ .../linux/ipq807x/patches/101-squashfs.patch  |    16 +
+ .../linux/ipq807x/patches/102-cig-wf188.patch |   869 +
+ .../ipq807x/patches/103-sercomm-wallaby.patch |   816 +
+ target/linux/ipq807x/patches/104-wf194c.patch |   816 +
+ .../patches/105-fix-dtc-gcc10-build.patch     |    11 +
+ target/linux/ipq807x/patches/106-eap101.patch |   993 +
+ .../linux/ipq807x/patches/108-log-spam.patch  |    37 +
+ target/linux/ipq807x/patches/109-tplink.patch |  1518 +
+ .../ipq807x/patches/110-add-esmt-nand.patch   |    37 +
+ target/linux/ipq807x/patches/111-eap106.patch |   765 +
+ target/linux/ipq807x/patches/112-pstore.patch |   147 +
+ .../ipq807x/patches/200-bpf_backport.patch    | 44635 ++++++++++++++++
+ toolchain/kernel-headers/Makefile             |     8 +
+ 60 files changed, 56539 insertions(+), 2 deletions(-)
  create mode 100644 package/boot/uboot-envtools/files/ipq807x
  create mode 100644 target/linux/ipq807x/109-logspam.patch
  create mode 100644 target/linux/ipq807x/Makefile
@@ -119,6 +120,7 @@ Signed-off-by: John Crispin <john@phrozen.org>
  create mode 100644 target/linux/ipq807x/patches/110-add-esmt-nand.patch
  create mode 100644 target/linux/ipq807x/patches/111-eap106.patch
  create mode 100644 target/linux/ipq807x/patches/112-pstore.patch
+ create mode 100644 target/linux/ipq807x/patches/200-bpf_backport.patch
 
 diff --git a/config/Config-kernel.in b/config/Config-kernel.in
 index f71114b5da..4a85d83118 100644
@@ -16566,6 +16568,44647 @@ index 0000000000..dc3960306d
 + #ifdef CONFIG_OF_ADDRESS
 + /*
 +  * The following routines scan a subtree and registers a device for
+diff --git a/target/linux/ipq807x/patches/200-bpf_backport.patch b/target/linux/ipq807x/patches/200-bpf_backport.patch
+new file mode 100644
+index 0000000000..3e730c313e
+--- /dev/null
++++ b/target/linux/ipq807x/patches/200-bpf_backport.patch
+@@ -0,0 +1,44635 @@
++--- a/arch/arm/Kconfig
+++++ b/arch/arm/Kconfig
++@@ -38,7 +38,7 @@ config ARM
++ 	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32
++ 	select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
++ 	select HAVE_ARCH_TRACEHOOK
++-	select HAVE_BPF_JIT
+++	select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
++ 	select HAVE_CC_STACKPROTECTOR
++ 	select HAVE_CONTEXT_TRACKING
++ 	select HAVE_C_RECORDMCOUNT
++--- a/arch/arm/net/bpf_jit_32.c
+++++ b/arch/arm/net/bpf_jit_32.c
++@@ -1,13 +1,12 @@
+++// SPDX-License-Identifier: GPL-2.0-only
++ /*
++- * Just-In-Time compiler for BPF filters on 32bit ARM
+++ * Just-In-Time compiler for eBPF filters on 32bit ARM
++  *
+++ * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
++  * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
++- *
++- * This program is free software; you can redistribute it and/or modify it
++- * under the terms of the GNU General Public License as published by the
++- * Free Software Foundation; version 2 of the License.
++  */
++ 
+++#include <linux/bpf.h>
++ #include <linux/bitops.h>
++ #include <linux/compiler.h>
++ #include <linux/errno.h>
++@@ -20,51 +19,182 @@
++ #include <asm/cacheflush.h>
++ #include <asm/hwcap.h>
++ #include <asm/opcodes.h>
+++#include <asm/system_info.h>
++ 
++ #include "bpf_jit_32.h"
++ 
++ /*
++- * ABI:
+++ * eBPF prog stack layout:
+++ *
+++ *                         high
+++ * original ARM_SP =>     +-----+
+++ *                        |     | callee saved registers
+++ *                        +-----+ <= (BPF_FP + SCRATCH_SIZE)
+++ *                        | ... | eBPF JIT scratch space
+++ * eBPF fp register =>    +-----+
+++ *   (BPF_FP)             | ... | eBPF prog stack
+++ *                        +-----+
+++ *                        |RSVD | JIT scratchpad
+++ * current ARM_SP =>      +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE)
+++ *                        |     |
+++ *                        | ... | Function call stack
+++ *                        |     |
+++ *                        +-----+
+++ *                          low
+++ *
+++ * The callee saved registers depends on whether frame pointers are enabled.
+++ * With frame pointers (to be compliant with the ABI):
+++ *
+++ *                              high
+++ * original ARM_SP =>     +--------------+ \
+++ *                        |      pc      | |
+++ * current ARM_FP =>      +--------------+ } callee saved registers
+++ *                        |r4-r9,fp,ip,lr| |
+++ *                        +--------------+ /
+++ *                              low
++  *
++- * r0	scratch register
++- * r4	BPF register A
++- * r5	BPF register X
++- * r6	pointer to the skb
++- * r7	skb->data
++- * r8	skb_headlen(skb)
+++ * Without frame pointers:
+++ *
+++ *                              high
+++ * original ARM_SP =>     +--------------+
+++ *                        |  r4-r9,fp,lr | callee saved registers
+++ * current ARM_FP =>      +--------------+
+++ *                              low
+++ *
+++ * When popping registers off the stack at the end of a BPF function, we
+++ * reference them via the current ARM_FP register.
++  */
+++#define CALLEE_MASK	(1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
+++			 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \
+++			 1 << ARM_FP)
+++#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
+++#define CALLEE_POP_MASK  (CALLEE_MASK | 1 << ARM_PC)
+++
+++enum {
+++	/* Stack layout - these are offsets from (top of stack - 4) */
+++	BPF_R2_HI,
+++	BPF_R2_LO,
+++	BPF_R3_HI,
+++	BPF_R3_LO,
+++	BPF_R4_HI,
+++	BPF_R4_LO,
+++	BPF_R5_HI,
+++	BPF_R5_LO,
+++	BPF_R7_HI,
+++	BPF_R7_LO,
+++	BPF_R8_HI,
+++	BPF_R8_LO,
+++	BPF_R9_HI,
+++	BPF_R9_LO,
+++	BPF_FP_HI,
+++	BPF_FP_LO,
+++	BPF_TC_HI,
+++	BPF_TC_LO,
+++	BPF_AX_HI,
+++	BPF_AX_LO,
+++	/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
+++	 * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
+++	 * BPF_REG_FP and Tail call counts.
+++	 */
+++	BPF_JIT_SCRATCH_REGS,
+++};
+++
+++/*
+++ * Negative "register" values indicate the register is stored on the stack
+++ * and are the offset from the top of the eBPF JIT scratch space.
+++ */
+++#define STACK_OFFSET(k)	(-4 - (k) * 4)
+++#define SCRATCH_SIZE	(BPF_JIT_SCRATCH_REGS * 4)
+++
+++#ifdef CONFIG_FRAME_POINTER
+++#define EBPF_SCRATCH_TO_ARM_FP(x) ((x) - 4 * hweight16(CALLEE_PUSH_MASK) - 4)
+++#else
+++#define EBPF_SCRATCH_TO_ARM_FP(x) (x)
+++#endif
++ 
++-#define r_scratch	ARM_R0
++-/* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */
++-#define r_off		ARM_R1
++-#define r_A		ARM_R4
++-#define r_X		ARM_R5
++-#define r_skb		ARM_R6
++-#define r_skb_data	ARM_R7
++-#define r_skb_hl	ARM_R8
++-
++-#define SCRATCH_SP_OFFSET	0
++-#define SCRATCH_OFF(k)		(SCRATCH_SP_OFFSET + 4 * (k))
++-
++-#define SEEN_MEM		((1 << BPF_MEMWORDS) - 1)
++-#define SEEN_MEM_WORD(k)	(1 << (k))
++-#define SEEN_X			(1 << BPF_MEMWORDS)
++-#define SEEN_CALL		(1 << (BPF_MEMWORDS + 1))
++-#define SEEN_SKB		(1 << (BPF_MEMWORDS + 2))
++-#define SEEN_DATA		(1 << (BPF_MEMWORDS + 3))
+++#define TMP_REG_1	(MAX_BPF_JIT_REG + 0)	/* TEMP Register 1 */
+++#define TMP_REG_2	(MAX_BPF_JIT_REG + 1)	/* TEMP Register 2 */
+++#define TCALL_CNT	(MAX_BPF_JIT_REG + 2)	/* Tail Call Count */
++ 
++-#define FLAG_NEED_X_RESET	(1 << 0)
++-#define FLAG_IMM_OVERFLOW	(1 << 1)
+++#define FLAG_IMM_OVERFLOW	(1 << 0)
+++
+++/*
+++ * Map eBPF registers to ARM 32bit registers or stack scratch space.
+++ *
+++ * 1. First argument is passed using the arm 32bit registers and rest of the
+++ * arguments are passed on stack scratch space.
+++ * 2. First callee-saved argument is mapped to arm 32 bit registers and rest
+++ * arguments are mapped to scratch space on stack.
+++ * 3. We need two 64 bit temp registers to do complex operations on eBPF
+++ * registers.
+++ *
+++ * As the eBPF registers are all 64 bit registers and arm has only 32 bit
+++ * registers, we have to map each eBPF registers with two arm 32 bit regs or
+++ * scratch memory space and we have to build eBPF 64 bit register from those.
+++ *
+++ */
+++static const s8 bpf2a32[][2] = {
+++	/* return value from in-kernel function, and exit value from eBPF */
+++	[BPF_REG_0] = {ARM_R1, ARM_R0},
+++	/* arguments from eBPF program to in-kernel function */
+++	[BPF_REG_1] = {ARM_R3, ARM_R2},
+++	/* Stored on stack scratch space */
+++	[BPF_REG_2] = {STACK_OFFSET(BPF_R2_HI), STACK_OFFSET(BPF_R2_LO)},
+++	[BPF_REG_3] = {STACK_OFFSET(BPF_R3_HI), STACK_OFFSET(BPF_R3_LO)},
+++	[BPF_REG_4] = {STACK_OFFSET(BPF_R4_HI), STACK_OFFSET(BPF_R4_LO)},
+++	[BPF_REG_5] = {STACK_OFFSET(BPF_R5_HI), STACK_OFFSET(BPF_R5_LO)},
+++	/* callee saved registers that in-kernel function will preserve */
+++	[BPF_REG_6] = {ARM_R5, ARM_R4},
+++	/* Stored on stack scratch space */
+++	[BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)},
+++	[BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)},
+++	[BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)},
+++	/* Read only Frame Pointer to access Stack */
+++	[BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)},
+++	/* Temporary Register for internal BPF JIT, can be used
+++	 * for constant blindings and others.
+++	 */
+++	[TMP_REG_1] = {ARM_R7, ARM_R6},
+++	[TMP_REG_2] = {ARM_R9, ARM_R8},
+++	/* Tail call count. Stored on stack scratch space. */
+++	[TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)},
+++	/* temporary register for blinding constants.
+++	 * Stored on stack scratch space.
+++	 */
+++	[BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)},
+++};
+++
+++#define	dst_lo	dst[1]
+++#define dst_hi	dst[0]
+++#define src_lo	src[1]
+++#define src_hi	src[0]
+++
+++/*
+++ * JIT Context:
+++ *
+++ * prog			:	bpf_prog
+++ * idx			:	index of current last JITed instruction.
+++ * prologue_bytes	:	bytes used in prologue.
+++ * epilogue_offset	:	offset of epilogue starting.
+++ * offsets		:	array of eBPF instruction offsets in
+++ *				JITed code.
+++ * target		:	final JITed code.
+++ * epilogue_bytes	:	no of bytes used in epilogue.
+++ * imm_count		:	no of immediate counts used for global
+++ *				variables.
+++ * imms			:	array of global variable addresses.
+++ */
++ 
++ struct jit_ctx {
++-	const struct bpf_prog *skf;
++-	unsigned idx;
++-	unsigned prologue_bytes;
++-	int ret0_fp_idx;
++-	u32 seen;
+++	const struct bpf_prog *prog;
+++	unsigned int idx;
+++	unsigned int prologue_bytes;
+++	unsigned int epilogue_offset;
+++	unsigned int cpu_architecture;
++ 	u32 flags;
++ 	u32 *offsets;
++ 	u32 *target;
+++	u32 stack_size;
++ #if __LINUX_ARM_ARCH__ < 7
++ 	u16 epilogue_bytes;
++ 	u16 imm_count;
++@@ -72,68 +202,16 @@ struct jit_ctx {
++ #endif
++ };
++ 
++-int bpf_jit_enable __read_mostly;
++-
++-static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret,
++-		      unsigned int size)
++-{
++-	void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size);
++-
++-	if (!ptr)
++-		return -EFAULT;
++-	memcpy(ret, ptr, size);
++-	return 0;
++-}
++-
++-static u64 jit_get_skb_b(struct sk_buff *skb, int offset)
++-{
++-	u8 ret;
++-	int err;
++-
++-	if (offset < 0)
++-		err = call_neg_helper(skb, offset, &ret, 1);
++-	else
++-		err = skb_copy_bits(skb, offset, &ret, 1);
++-
++-	return (u64)err << 32 | ret;
++-}
++-
++-static u64 jit_get_skb_h(struct sk_buff *skb, int offset)
++-{
++-	u16 ret;
++-	int err;
++-
++-	if (offset < 0)
++-		err = call_neg_helper(skb, offset, &ret, 2);
++-	else
++-		err = skb_copy_bits(skb, offset, &ret, 2);
++-
++-	return (u64)err << 32 | ntohs(ret);
++-}
++-
++-static u64 jit_get_skb_w(struct sk_buff *skb, int offset)
++-{
++-	u32 ret;
++-	int err;
++-
++-	if (offset < 0)
++-		err = call_neg_helper(skb, offset, &ret, 4);
++-	else
++-		err = skb_copy_bits(skb, offset, &ret, 4);
++-
++-	return (u64)err << 32 | ntohl(ret);
++-}
++-
++ /*
++  * Wrappers which handle both OABI and EABI and assures Thumb2 interworking
++  * (where the assembly routines like __aeabi_uidiv could cause problems).
++  */
++-static u32 jit_udiv(u32 dividend, u32 divisor)
+++static u32 jit_udiv32(u32 dividend, u32 divisor)
++ {
++ 	return dividend / divisor;
++ }
++ 
++-static u32 jit_mod(u32 dividend, u32 divisor)
+++static u32 jit_mod32(u32 dividend, u32 divisor)
++ {
++ 	return dividend % divisor;
++ }
++@@ -157,36 +235,100 @@ static inline void emit(u32 inst, struct
++ 	_emit(ARM_COND_AL, inst, ctx);
++ }
++ 
++-static u16 saved_regs(struct jit_ctx *ctx)
+++/*
+++ * This is rather horrid, but necessary to convert an integer constant
+++ * to an immediate operand for the opcodes, and be able to detect at
+++ * build time whether the constant can't be converted (iow, usable in
+++ * BUILD_BUG_ON()).
+++ */
+++#define imm12val(v, s) (rol32(v, (s)) | (s) << 7)
+++#define const_imm8m(x)					\
+++	({ int r;					\
+++	   u32 v = (x);					\
+++	   if (!(v & ~0x000000ff))			\
+++		r = imm12val(v, 0);			\
+++	   else if (!(v & ~0xc000003f))			\
+++		r = imm12val(v, 2);			\
+++	   else if (!(v & ~0xf000000f))			\
+++		r = imm12val(v, 4);			\
+++	   else if (!(v & ~0xfc000003))			\
+++		r = imm12val(v, 6);			\
+++	   else if (!(v & ~0xff000000))			\
+++		r = imm12val(v, 8);			\
+++	   else if (!(v & ~0x3fc00000))			\
+++		r = imm12val(v, 10);			\
+++	   else if (!(v & ~0x0ff00000))			\
+++		r = imm12val(v, 12);			\
+++	   else if (!(v & ~0x03fc0000))			\
+++		r = imm12val(v, 14);			\
+++	   else if (!(v & ~0x00ff0000))			\
+++		r = imm12val(v, 16);			\
+++	   else if (!(v & ~0x003fc000))			\
+++		r = imm12val(v, 18);			\
+++	   else if (!(v & ~0x000ff000))			\
+++		r = imm12val(v, 20);			\
+++	   else if (!(v & ~0x0003fc00))			\
+++		r = imm12val(v, 22);			\
+++	   else if (!(v & ~0x0000ff00))			\
+++		r = imm12val(v, 24);			\
+++	   else if (!(v & ~0x00003fc0))			\
+++		r = imm12val(v, 26);			\
+++	   else if (!(v & ~0x00000ff0))			\
+++		r = imm12val(v, 28);			\
+++	   else if (!(v & ~0x000003fc))			\
+++		r = imm12val(v, 30);			\
+++	   else						\
+++		r = -1;					\
+++	   r; })
+++
+++/*
+++ * Checks if immediate value can be converted to imm12(12 bits) value.
+++ */
+++static int imm8m(u32 x)
++ {
++-	u16 ret = 0;
+++	u32 rot;
++ 
++-	if ((ctx->skf->len > 1) ||
++-	    (ctx->skf->insns[0].code == (BPF_RET | BPF_A)))
++-		ret |= 1 << r_A;
+++	for (rot = 0; rot < 16; rot++)
+++		if ((x & ~ror32(0xff, 2 * rot)) == 0)
+++			return rol32(x, 2 * rot) | (rot << 8);
+++	return -1;
+++}
++ 
++-#ifdef CONFIG_FRAME_POINTER
++-	ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC);
++-#else
++-	if (ctx->seen & SEEN_CALL)
++-		ret |= 1 << ARM_LR;
++-#endif
++-	if (ctx->seen & (SEEN_DATA | SEEN_SKB))
++-		ret |= 1 << r_skb;
++-	if (ctx->seen & SEEN_DATA)
++-		ret |= (1 << r_skb_data) | (1 << r_skb_hl);
++-	if (ctx->seen & SEEN_X)
++-		ret |= 1 << r_X;
+++#define imm8m(x) (__builtin_constant_p(x) ? const_imm8m(x) : imm8m(x))
++ 
++-	return ret;
+++static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12)
+++{
+++	op |= rt << 12 | rn << 16;
+++	if (imm12 >= 0)
+++		op |= ARM_INST_LDST__U;
+++	else
+++		imm12 = -imm12;
+++	return op | (imm12 & ARM_INST_LDST__IMM12);
++ }
++ 
++-static inline int mem_words_used(struct jit_ctx *ctx)
+++static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8)
++ {
++-	/* yes, we do waste some stack space IF there are "holes" in the set" */
++-	return fls(ctx->seen & SEEN_MEM);
+++	op |= rt << 12 | rn << 16;
+++	if (imm8 >= 0)
+++		op |= ARM_INST_LDST__U;
+++	else
+++		imm8 = -imm8;
+++	return op | (imm8 & 0xf0) << 4 | (imm8 & 0x0f);
++ }
++ 
+++#define ARM_LDR_I(rt, rn, off)	arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off)
+++#define ARM_LDRB_I(rt, rn, off)	arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off)
+++#define ARM_LDRD_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off)
+++#define ARM_LDRH_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off)
+++
+++#define ARM_STR_I(rt, rn, off)	arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off)
+++#define ARM_STRB_I(rt, rn, off)	arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off)
+++#define ARM_STRD_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off)
+++#define ARM_STRH_I(rt, rn, off)	arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off)
+++
+++/*
+++ * Initializes the JIT space with undefined instructions.
+++ */
++ static void jit_fill_hole(void *area, unsigned int size)
++ {
++ 	u32 *ptr;
++@@ -195,88 +337,23 @@ static void jit_fill_hole(void *area, un
++ 		*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
++ }
++ 
++-static void build_prologue(struct jit_ctx *ctx)
++-{
++-	u16 reg_set = saved_regs(ctx);
++-	u16 off;
++-
++-#ifdef CONFIG_FRAME_POINTER
++-	emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
++-	emit(ARM_PUSH(reg_set), ctx);
++-	emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
+++#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
+++/* EABI requires the stack to be aligned to 64-bit boundaries */
+++#define STACK_ALIGNMENT	8
++ #else
++-	if (reg_set)
++-		emit(ARM_PUSH(reg_set), ctx);
+++/* Stack must be aligned to 32-bit boundaries */
+++#define STACK_ALIGNMENT	4
++ #endif
++ 
++-	if (ctx->seen & (SEEN_DATA | SEEN_SKB))
++-		emit(ARM_MOV_R(r_skb, ARM_R0), ctx);
++-
++-	if (ctx->seen & SEEN_DATA) {
++-		off = offsetof(struct sk_buff, data);
++-		emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx);
++-		/* headlen = len - data_len */
++-		off = offsetof(struct sk_buff, len);
++-		emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx);
++-		off = offsetof(struct sk_buff, data_len);
++-		emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
++-		emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx);
++-	}
++-
++-	if (ctx->flags & FLAG_NEED_X_RESET)
++-		emit(ARM_MOV_I(r_X, 0), ctx);
++-
++-	/* do not leak kernel data to userspace */
++-	if (bpf_needs_clear_a(&ctx->skf->insns[0]))
++-		emit(ARM_MOV_I(r_A, 0), ctx);
++-
++-	/* stack space for the BPF_MEM words */
++-	if (ctx->seen & SEEN_MEM)
++-		emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
++-}
++-
++-static void build_epilogue(struct jit_ctx *ctx)
++-{
++-	u16 reg_set = saved_regs(ctx);
++-
++-	if (ctx->seen & SEEN_MEM)
++-		emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
++-
++-	reg_set &= ~(1 << ARM_LR);
++-
++-#ifdef CONFIG_FRAME_POINTER
++-	/* the first instruction of the prologue was: mov ip, sp */
++-	reg_set &= ~(1 << ARM_IP);
++-	reg_set |= (1 << ARM_SP);
++-	emit(ARM_LDM(ARM_SP, reg_set), ctx);
++-#else
++-	if (reg_set) {
++-		if (ctx->seen & SEEN_CALL)
++-			reg_set |= 1 << ARM_PC;
++-		emit(ARM_POP(reg_set), ctx);
++-	}
++-
++-	if (!(ctx->seen & SEEN_CALL))
++-		emit(ARM_BX(ARM_LR), ctx);
++-#endif
++-}
++-
++-static int16_t imm8m(u32 x)
++-{
++-	u32 rot;
++-
++-	for (rot = 0; rot < 16; rot++)
++-		if ((x & ~ror32(0xff, 2 * rot)) == 0)
++-			return rol32(x, 2 * rot) | (rot << 8);
++-
++-	return -1;
++-}
+++/* total stack size used in JITed code */
+++#define _STACK_SIZE	(ctx->prog->aux->stack_depth + SCRATCH_SIZE)
+++#define STACK_SIZE	ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
++ 
++ #if __LINUX_ARM_ARCH__ < 7
++ 
++ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
++ {
++-	unsigned i = 0, offset;
+++	unsigned int i = 0, offset;
++ 	u16 imm;
++ 
++ 	/* on the "fake" run we just count them (duplicates included) */
++@@ -295,7 +372,7 @@ static u16 imm_offset(u32 k, struct jit_
++ 		ctx->imms[i] = k;
++ 
++ 	/* constants go just after the epilogue */
++-	offset =  ctx->offsets[ctx->skf->len];
+++	offset =  ctx->offsets[ctx->prog->len - 1] * 4;
++ 	offset += ctx->prologue_bytes;
++ 	offset += ctx->epilogue_bytes;
++ 	offset += i * 4;
++@@ -319,10 +396,22 @@ static u16 imm_offset(u32 k, struct jit_
++ 
++ #endif /* __LINUX_ARM_ARCH__ */
++ 
+++static inline int bpf2a32_offset(int bpf_to, int bpf_from,
+++				 const struct jit_ctx *ctx) {
+++	int to, from;
+++
+++	if (ctx->target == NULL)
+++		return 0;
+++	to = ctx->offsets[bpf_to];
+++	from = ctx->offsets[bpf_from];
+++
+++	return to - from - 1;
+++}
+++
++ /*
++  * Move an immediate that's not an imm8m to a core register.
++  */
++-static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx)
+++static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx)
++ {
++ #if __LINUX_ARM_ARCH__ < 7
++ 	emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx);
++@@ -333,7 +422,7 @@ static inline void emit_mov_i_no8m(int r
++ #endif
++ }
++ 
++-static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx)
+++static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
++ {
++ 	int imm12 = imm8m(val);
++ 
++@@ -343,676 +432,1508 @@ static inline void emit_mov_i(int rd, u3
++ 		emit_mov_i_no8m(rd, val, ctx);
++ }
++ 
++-#if __LINUX_ARM_ARCH__ < 6
+++static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx)
+++{
+++	if (elf_hwcap & HWCAP_THUMB)
+++		emit(ARM_BX(tgt_reg), ctx);
+++	else
+++		emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
+++}
++ 
++-static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+++static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
++ {
++-	_emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx);
++-	_emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
++-	_emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx);
++-	_emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx);
++-	_emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx);
++-	_emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx);
++-	_emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx);
++-	_emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx);
+++#if __LINUX_ARM_ARCH__ < 5
+++	emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+++	emit_bx_r(tgt_reg, ctx);
+++#else
+++	emit(ARM_BLX_R(tgt_reg), ctx);
+++#endif
++ }
++ 
++-static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+++static inline int epilogue_offset(const struct jit_ctx *ctx)
++ {
++-	_emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
++-	_emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx);
++-	_emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx);
+++	int to, from;
+++	/* No need for 1st dummy run */
+++	if (ctx->target == NULL)
+++		return 0;
+++	to = ctx->epilogue_offset;
+++	from = ctx->idx;
+++
+++	return to - from - 2;
++ }
++ 
++-static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx)
+++static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
++ {
++-	/* r_dst = (r_src << 8) | (r_src >> 8) */
++-	emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx);
++-	emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx);
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++
+++#if __LINUX_ARM_ARCH__ == 7
+++	if (elf_hwcap & HWCAP_IDIVA) {
+++		if (op == BPF_DIV)
+++			emit(ARM_UDIV(rd, rm, rn), ctx);
+++		else {
+++			emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
+++			emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
+++		}
+++		return;
+++	}
+++#endif
++ 
++ 	/*
++-	 * we need to mask out the bits set in r_dst[23:16] due to
++-	 * the first shift instruction.
++-	 *
++-	 * note that 0x8ff is the encoded immediate 0x00ff0000.
+++	 * For BPF_ALU | BPF_DIV | BPF_K instructions
+++	 * As ARM_R1 and ARM_R0 contains 1st argument of bpf
+++	 * function, we need to save it on caller side to save
+++	 * it from getting destroyed within callee.
+++	 * After the return from the callee, we restore ARM_R0
+++	 * ARM_R1.
++ 	 */
++-	emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx);
+++	if (rn != ARM_R1) {
+++		emit(ARM_MOV_R(tmp[0], ARM_R1), ctx);
+++		emit(ARM_MOV_R(ARM_R1, rn), ctx);
+++	}
+++	if (rm != ARM_R0) {
+++		emit(ARM_MOV_R(tmp[1], ARM_R0), ctx);
+++		emit(ARM_MOV_R(ARM_R0, rm), ctx);
+++	}
+++
+++	/* Call appropriate function */
+++	emit_mov_i(ARM_IP, op == BPF_DIV ?
+++		   (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+++	emit_blx_r(ARM_IP, ctx);
+++
+++	/* Save return value */
+++	if (rd != ARM_R0)
+++		emit(ARM_MOV_R(rd, ARM_R0), ctx);
+++
+++	/* Restore ARM_R0 and ARM_R1 */
+++	if (rn != ARM_R1)
+++		emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx);
+++	if (rm != ARM_R0)
+++		emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
++ }
++ 
++-#else  /* ARMv6+ */
+++/* Is the translated BPF register on stack? */
+++static bool is_stacked(s8 reg)
+++{
+++	return reg < 0;
+++}
++ 
++-static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+++/* If a BPF register is on the stack (stk is true), load it to the
+++ * supplied temporary register and return the temporary register
+++ * for subsequent operations, otherwise just use the CPU register.
+++ */
+++static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx)
++ {
++-	_emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx);
++-#ifdef __LITTLE_ENDIAN
++-	_emit(cond, ARM_REV(r_res, r_res), ctx);
++-#endif
+++	if (is_stacked(reg)) {
+++		emit(ARM_LDR_I(tmp, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx);
+++		reg = tmp;
+++	}
+++	return reg;
++ }
++ 
++-static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+++static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp,
+++				   struct jit_ctx *ctx)
++ {
++-	_emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx);
++-#ifdef __LITTLE_ENDIAN
++-	_emit(cond, ARM_REV16(r_res, r_res), ctx);
++-#endif
+++	if (is_stacked(reg[1])) {
+++		if (__LINUX_ARM_ARCH__ >= 6 ||
+++		    ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) {
+++			emit(ARM_LDRD_I(tmp[1], ARM_FP,
+++					EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
+++		} else {
+++			emit(ARM_LDR_I(tmp[1], ARM_FP,
+++				       EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
+++			emit(ARM_LDR_I(tmp[0], ARM_FP,
+++				       EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx);
+++		}
+++		reg = tmp;
+++	}
+++	return reg;
++ }
++ 
++-static inline void emit_swap16(u8 r_dst __maybe_unused,
++-			       u8 r_src __maybe_unused,
++-			       struct jit_ctx *ctx __maybe_unused)
+++/* If a BPF register is on the stack (stk is true), save the register
+++ * back to the stack.  If the source register is not the same, then
+++ * move it into the correct register.
+++ */
+++static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx)
++ {
++-#ifdef __LITTLE_ENDIAN
++-	emit(ARM_REV16(r_dst, r_src), ctx);
++-#endif
+++	if (is_stacked(reg))
+++		emit(ARM_STR_I(src, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx);
+++	else if (reg != src)
+++		emit(ARM_MOV_R(reg, src), ctx);
+++}
+++
+++static void arm_bpf_put_reg64(const s8 *reg, const s8 *src,
+++			      struct jit_ctx *ctx)
+++{
+++	if (is_stacked(reg[1])) {
+++		if (__LINUX_ARM_ARCH__ >= 6 ||
+++		    ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) {
+++			emit(ARM_STRD_I(src[1], ARM_FP,
+++				       EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
+++		} else {
+++			emit(ARM_STR_I(src[1], ARM_FP,
+++				       EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx);
+++			emit(ARM_STR_I(src[0], ARM_FP,
+++				       EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx);
+++		}
+++	} else {
+++		if (reg[1] != src[1])
+++			emit(ARM_MOV_R(reg[1], src[1]), ctx);
+++		if (reg[0] != src[0])
+++			emit(ARM_MOV_R(reg[0], src[0]), ctx);
+++	}
++ }
++ 
++-#endif /* __LINUX_ARM_ARCH__ < 6 */
+++static inline void emit_a32_mov_i(const s8 dst, const u32 val,
+++				  struct jit_ctx *ctx)
+++{
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
++ 
+++	if (is_stacked(dst)) {
+++		emit_mov_i(tmp[1], val, ctx);
+++		arm_bpf_put_reg32(dst, tmp[1], ctx);
+++	} else {
+++		emit_mov_i(dst, val, ctx);
+++	}
+++}
++ 
++-/* Compute the immediate value for a PC-relative branch. */
++-static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx)
+++static void emit_a32_mov_i64(const s8 dst[], u64 val, struct jit_ctx *ctx)
++ {
++-	u32 imm;
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
++ 
++-	if (ctx->target == NULL)
++-		return 0;
++-	/*
++-	 * BPF allows only forward jumps and the offset of the target is
++-	 * still the one computed during the first pass.
+++	emit_mov_i(rd[1], (u32)val, ctx);
+++	emit_mov_i(rd[0], val >> 32, ctx);
+++
+++	arm_bpf_put_reg64(dst, rd, ctx);
+++}
+++
+++/* Sign extended move */
+++static inline void emit_a32_mov_se_i64(const bool is64, const s8 dst[],
+++				       const u32 val, struct jit_ctx *ctx) {
+++	u64 val64 = val;
+++
+++	if (is64 && (val & (1<<31)))
+++		val64 |= 0xffffffff00000000ULL;
+++	emit_a32_mov_i64(dst, val64, ctx);
+++}
+++
+++static inline void emit_a32_add_r(const u8 dst, const u8 src,
+++			      const bool is64, const bool hi,
+++			      struct jit_ctx *ctx) {
+++	/* 64 bit :
+++	 *	adds dst_lo, dst_lo, src_lo
+++	 *	adc dst_hi, dst_hi, src_hi
+++	 * 32 bit :
+++	 *	add dst_lo, dst_lo, src_lo
++ 	 */
++-	imm  = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8);
+++	if (!hi && is64)
+++		emit(ARM_ADDS_R(dst, dst, src), ctx);
+++	else if (hi && is64)
+++		emit(ARM_ADC_R(dst, dst, src), ctx);
+++	else
+++		emit(ARM_ADD_R(dst, dst, src), ctx);
+++}
++ 
++-	return imm >> 2;
+++static inline void emit_a32_sub_r(const u8 dst, const u8 src,
+++				  const bool is64, const bool hi,
+++				  struct jit_ctx *ctx) {
+++	/* 64 bit :
+++	 *	subs dst_lo, dst_lo, src_lo
+++	 *	sbc dst_hi, dst_hi, src_hi
+++	 * 32 bit :
+++	 *	sub dst_lo, dst_lo, src_lo
+++	 */
+++	if (!hi && is64)
+++		emit(ARM_SUBS_R(dst, dst, src), ctx);
+++	else if (hi && is64)
+++		emit(ARM_SBC_R(dst, dst, src), ctx);
+++	else
+++		emit(ARM_SUB_R(dst, dst, src), ctx);
+++}
+++
+++static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
+++			      const bool hi, const u8 op, struct jit_ctx *ctx){
+++	switch (BPF_OP(op)) {
+++	/* dst = dst + src */
+++	case BPF_ADD:
+++		emit_a32_add_r(dst, src, is64, hi, ctx);
+++		break;
+++	/* dst = dst - src */
+++	case BPF_SUB:
+++		emit_a32_sub_r(dst, src, is64, hi, ctx);
+++		break;
+++	/* dst = dst | src */
+++	case BPF_OR:
+++		emit(ARM_ORR_R(dst, dst, src), ctx);
+++		break;
+++	/* dst = dst & src */
+++	case BPF_AND:
+++		emit(ARM_AND_R(dst, dst, src), ctx);
+++		break;
+++	/* dst = dst ^ src */
+++	case BPF_XOR:
+++		emit(ARM_EOR_R(dst, dst, src), ctx);
+++		break;
+++	/* dst = dst * src */
+++	case BPF_MUL:
+++		emit(ARM_MUL(dst, dst, src), ctx);
+++		break;
+++	/* dst = dst << src */
+++	case BPF_LSH:
+++		emit(ARM_LSL_R(dst, dst, src), ctx);
+++		break;
+++	/* dst = dst >> src */
+++	case BPF_RSH:
+++		emit(ARM_LSR_R(dst, dst, src), ctx);
+++		break;
+++	/* dst = dst >> src (signed)*/
+++	case BPF_ARSH:
+++		emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx);
+++		break;
+++	}
++ }
++ 
++-#define OP_IMM3(op, r1, r2, imm_val, ctx)				\
++-	do {								\
++-		imm12 = imm8m(imm_val);					\
++-		if (imm12 < 0) {					\
++-			emit_mov_i_no8m(r_scratch, imm_val, ctx);	\
++-			emit(op ## _R((r1), (r2), r_scratch), ctx);	\
++-		} else {						\
++-			emit(op ## _I((r1), (r2), imm12), ctx);		\
++-		}							\
++-	} while (0)
++-
++-static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx)
++-{
++-	if (ctx->ret0_fp_idx >= 0) {
++-		_emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx);
++-		/* NOP to keep the size constant between passes */
++-		emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx);
+++/* ALU operation (32 bit)
+++ * dst = dst (op) src
+++ */
+++static inline void emit_a32_alu_r(const s8 dst, const s8 src,
+++				  struct jit_ctx *ctx, const bool is64,
+++				  const bool hi, const u8 op) {
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	s8 rn, rd;
+++
+++	rn = arm_bpf_get_reg32(src, tmp[1], ctx);
+++	rd = arm_bpf_get_reg32(dst, tmp[0], ctx);
+++	/* ALU operation */
+++	emit_alu_r(rd, rn, is64, hi, op, ctx);
+++	arm_bpf_put_reg32(dst, rd, ctx);
+++}
+++
+++/* ALU operation (64 bit) */
+++static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
+++				  const s8 src[], struct jit_ctx *ctx,
+++				  const u8 op) {
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
+++	const s8 *rd;
+++
+++	rd = arm_bpf_get_reg64(dst, tmp, ctx);
+++	if (is64) {
+++		const s8 *rs;
+++
+++		rs = arm_bpf_get_reg64(src, tmp2, ctx);
+++
+++		/* ALU operation */
+++		emit_alu_r(rd[1], rs[1], true, false, op, ctx);
+++		emit_alu_r(rd[0], rs[0], true, true, op, ctx);
++ 	} else {
++-		_emit(cond, ARM_MOV_I(ARM_R0, 0), ctx);
++-		_emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx);
+++		s8 rs;
+++
+++		rs = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
+++
+++		/* ALU operation */
+++		emit_alu_r(rd[1], rs, true, false, op, ctx);
+++		if (!ctx->prog->aux->verifier_zext)
+++			emit_a32_mov_i(rd[0], 0, ctx);
++ 	}
+++
+++	arm_bpf_put_reg64(dst, rd, ctx);
++ }
++ 
++-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
++-{
++-#if __LINUX_ARM_ARCH__ < 5
++-	emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+++/* dst = src (4 bytes)*/
+++static inline void emit_a32_mov_r(const s8 dst, const s8 src,
+++				  struct jit_ctx *ctx) {
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	s8 rt;
+++
+++	rt = arm_bpf_get_reg32(src, tmp[0], ctx);
+++	arm_bpf_put_reg32(dst, rt, ctx);
+++}
+++
+++/* dst = src */
+++static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
+++				  const s8 src[],
+++				  struct jit_ctx *ctx) {
+++	if (!is64) {
+++		emit_a32_mov_r(dst_lo, src_lo, ctx);
+++		if (!ctx->prog->aux->verifier_zext)
+++			/* Zero out high 4 bytes */
+++			emit_a32_mov_i(dst_hi, 0, ctx);
+++	} else if (__LINUX_ARM_ARCH__ < 6 &&
+++		   ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
+++		/* complete 8 byte move */
+++		emit_a32_mov_r(dst_lo, src_lo, ctx);
+++		emit_a32_mov_r(dst_hi, src_hi, ctx);
+++	} else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
+++		const u8 *tmp = bpf2a32[TMP_REG_1];
+++
+++		emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx);
+++		emit(ARM_STRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx);
+++	} else if (is_stacked(src_lo)) {
+++		emit(ARM_LDRD_I(dst[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx);
+++	} else if (is_stacked(dst_lo)) {
+++		emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx);
+++	} else {
+++		emit(ARM_MOV_R(dst[0], src[0]), ctx);
+++		emit(ARM_MOV_R(dst[1], src[1]), ctx);
+++	}
+++}
++ 
++-	if (elf_hwcap & HWCAP_THUMB)
++-		emit(ARM_BX(tgt_reg), ctx);
++-	else
++-		emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
++-#else
++-	emit(ARM_BLX_R(tgt_reg), ctx);
++-#endif
+++/* Shift operations */
+++static inline void emit_a32_alu_i(const s8 dst, const u32 val,
+++				struct jit_ctx *ctx, const u8 op) {
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	s8 rd;
+++
+++	rd = arm_bpf_get_reg32(dst, tmp[0], ctx);
+++
+++	/* Do shift operation */
+++	switch (op) {
+++	case BPF_LSH:
+++		emit(ARM_LSL_I(rd, rd, val), ctx);
+++		break;
+++	case BPF_RSH:
+++		emit(ARM_LSR_I(rd, rd, val), ctx);
+++		break;
+++	case BPF_NEG:
+++		emit(ARM_RSB_I(rd, rd, val), ctx);
+++		break;
+++	}
+++
+++	arm_bpf_put_reg32(dst, rd, ctx);
++ }
++ 
++-static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx,
++-				int bpf_op)
++-{
++-#if __LINUX_ARM_ARCH__ == 7
++-	if (elf_hwcap & HWCAP_IDIVA) {
++-		if (bpf_op == BPF_DIV)
++-			emit(ARM_UDIV(rd, rm, rn), ctx);
++-		else {
++-			emit(ARM_UDIV(ARM_R3, rm, rn), ctx);
++-			emit(ARM_MLS(rd, rn, ARM_R3, rm), ctx);
+++/* dst = ~dst (64 bit) */
+++static inline void emit_a32_neg64(const s8 dst[],
+++				struct jit_ctx *ctx){
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *rd;
+++
+++	/* Setup Operand */
+++	rd = arm_bpf_get_reg64(dst, tmp, ctx);
+++
+++	/* Do Negate Operation */
+++	emit(ARM_RSBS_I(rd[1], rd[1], 0), ctx);
+++	emit(ARM_RSC_I(rd[0], rd[0], 0), ctx);
+++
+++	arm_bpf_put_reg64(dst, rd, ctx);
+++}
+++
+++/* dst = dst << src */
+++static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[],
+++				    struct jit_ctx *ctx) {
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
+++	const s8 *rd;
+++	s8 rt;
+++
+++	/* Setup Operands */
+++	rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
+++	rd = arm_bpf_get_reg64(dst, tmp, ctx);
+++
+++	/* Do LSH operation */
+++	emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
+++	emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
+++	emit(ARM_MOV_SR(ARM_LR, rd[0], SRTYPE_ASL, rt), ctx);
+++	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[1], SRTYPE_ASL, ARM_IP), ctx);
+++	emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd[1], SRTYPE_LSR, tmp2[0]), ctx);
+++	emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_ASL, rt), ctx);
+++
+++	arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
+++	arm_bpf_put_reg32(dst_hi, ARM_IP, ctx);
+++}
+++
+++/* dst = dst >> src (signed)*/
+++static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[],
+++				     struct jit_ctx *ctx) {
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
+++	const s8 *rd;
+++	s8 rt;
+++
+++	/* Setup Operands */
+++	rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
+++	rd = arm_bpf_get_reg64(dst, tmp, ctx);
+++
+++	/* Do the ARSH operation */
+++	emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+++	emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+++	emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx);
+++	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx);
+++	_emit(ARM_COND_MI, ARM_B(0), ctx);
+++	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx);
+++	emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx);
+++
+++	arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
+++	arm_bpf_put_reg32(dst_hi, ARM_IP, ctx);
+++}
+++
+++/* dst = dst >> src */
+++static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[],
+++				    struct jit_ctx *ctx) {
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
+++	const s8 *rd;
+++	s8 rt;
+++
+++	/* Setup Operands */
+++	rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
+++	rd = arm_bpf_get_reg64(dst, tmp, ctx);
+++
+++	/* Do RSH operation */
+++	emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+++	emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+++	emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx);
+++	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx);
+++	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_LSR, tmp2[0]), ctx);
+++	emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_LSR, rt), ctx);
+++
+++	arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
+++	arm_bpf_put_reg32(dst_hi, ARM_IP, ctx);
+++}
+++
+++/* dst = dst << val */
+++static inline void emit_a32_lsh_i64(const s8 dst[],
+++				    const u32 val, struct jit_ctx *ctx){
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
+++	const s8 *rd;
+++
+++	/* Setup operands */
+++	rd = arm_bpf_get_reg64(dst, tmp, ctx);
+++
+++	/* Do LSH operation */
+++	if (val < 32) {
+++		emit(ARM_MOV_SI(tmp2[0], rd[0], SRTYPE_ASL, val), ctx);
+++		emit(ARM_ORR_SI(rd[0], tmp2[0], rd[1], SRTYPE_LSR, 32 - val), ctx);
+++		emit(ARM_MOV_SI(rd[1], rd[1], SRTYPE_ASL, val), ctx);
+++	} else {
+++		if (val == 32)
+++			emit(ARM_MOV_R(rd[0], rd[1]), ctx);
+++		else
+++			emit(ARM_MOV_SI(rd[0], rd[1], SRTYPE_ASL, val - 32), ctx);
+++		emit(ARM_EOR_R(rd[1], rd[1], rd[1]), ctx);
+++	}
+++
+++	arm_bpf_put_reg64(dst, rd, ctx);
+++}
+++
+++/* dst = dst >> val */
+++static inline void emit_a32_rsh_i64(const s8 dst[],
+++				    const u32 val, struct jit_ctx *ctx) {
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
+++	const s8 *rd;
+++
+++	/* Setup operands */
+++	rd = arm_bpf_get_reg64(dst, tmp, ctx);
+++
+++	/* Do LSR operation */
+++	if (val == 0) {
+++		/* An immediate value of 0 encodes a shift amount of 32
+++		 * for LSR. To shift by 0, don't do anything.
+++		 */
+++	} else if (val < 32) {
+++		emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx);
+++		emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx);
+++		emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx);
+++	} else if (val == 32) {
+++		emit(ARM_MOV_R(rd[1], rd[0]), ctx);
+++		emit(ARM_MOV_I(rd[0], 0), ctx);
+++	} else {
+++		emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_LSR, val - 32), ctx);
+++		emit(ARM_MOV_I(rd[0], 0), ctx);
+++	}
+++
+++	arm_bpf_put_reg64(dst, rd, ctx);
+++}
+++
+++/* dst = dst >> val (signed) */
+++static inline void emit_a32_arsh_i64(const s8 dst[],
+++				     const u32 val, struct jit_ctx *ctx){
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
+++	const s8 *rd;
+++
+++	/* Setup operands */
+++	rd = arm_bpf_get_reg64(dst, tmp, ctx);
+++
+++	/* Do ARSH operation */
+++	if (val == 0) {
+++		/* An immediate value of 0 encodes a shift amount of 32
+++		 * for ASR. To shift by 0, don't do anything.
+++		 */
+++	} else if (val < 32) {
+++		emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx);
+++		emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx);
+++		emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx);
+++	} else if (val == 32) {
+++		emit(ARM_MOV_R(rd[1], rd[0]), ctx);
+++		emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx);
+++	} else {
+++		emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_ASR, val - 32), ctx);
+++		emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx);
+++	}
+++
+++	arm_bpf_put_reg64(dst, rd, ctx);
+++}
+++
+++static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[],
+++				    struct jit_ctx *ctx) {
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
+++	const s8 *rd, *rt;
+++
+++	/* Setup operands for multiplication */
+++	rd = arm_bpf_get_reg64(dst, tmp, ctx);
+++	rt = arm_bpf_get_reg64(src, tmp2, ctx);
+++
+++	/* Do Multiplication */
+++	emit(ARM_MUL(ARM_IP, rd[1], rt[0]), ctx);
+++	emit(ARM_MUL(ARM_LR, rd[0], rt[1]), ctx);
+++	emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
+++
+++	emit(ARM_UMULL(ARM_IP, rd[0], rd[1], rt[1]), ctx);
+++	emit(ARM_ADD_R(rd[0], ARM_LR, rd[0]), ctx);
+++
+++	arm_bpf_put_reg32(dst_lo, ARM_IP, ctx);
+++	arm_bpf_put_reg32(dst_hi, rd[0], ctx);
+++}
+++
+++static bool is_ldst_imm(s16 off, const u8 size)
+++{
+++	s16 off_max = 0;
+++
+++	switch (size) {
+++	case BPF_B:
+++	case BPF_W:
+++		off_max = 0xfff;
+++		break;
+++	case BPF_H:
+++		off_max = 0xff;
+++		break;
+++	case BPF_DW:
+++		/* Need to make sure off+4 does not overflow. */
+++		off_max = 0xfff - 4;
+++		break;
+++	}
+++	return -off_max <= off && off <= off_max;
+++}
+++
+++/* *(size *)(dst + off) = src */
+++static inline void emit_str_r(const s8 dst, const s8 src[],
+++			      s16 off, struct jit_ctx *ctx, const u8 sz){
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	s8 rd;
+++
+++	rd = arm_bpf_get_reg32(dst, tmp[1], ctx);
+++
+++	if (!is_ldst_imm(off, sz)) {
+++		emit_a32_mov_i(tmp[0], off, ctx);
+++		emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx);
+++		rd = tmp[0];
+++		off = 0;
+++	}
+++	switch (sz) {
+++	case BPF_B:
+++		/* Store a Byte */
+++		emit(ARM_STRB_I(src_lo, rd, off), ctx);
+++		break;
+++	case BPF_H:
+++		/* Store a HalfWord */
+++		emit(ARM_STRH_I(src_lo, rd, off), ctx);
+++		break;
+++	case BPF_W:
+++		/* Store a Word */
+++		emit(ARM_STR_I(src_lo, rd, off), ctx);
+++		break;
+++	case BPF_DW:
+++		/* Store a Double Word */
+++		emit(ARM_STR_I(src_lo, rd, off), ctx);
+++		emit(ARM_STR_I(src_hi, rd, off + 4), ctx);
+++		break;
+++	}
+++}
+++
+++/* dst = *(size*)(src + off) */
+++static inline void emit_ldx_r(const s8 dst[], const s8 src,
+++			      s16 off, struct jit_ctx *ctx, const u8 sz){
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
+++	s8 rm = src;
+++
+++	if (!is_ldst_imm(off, sz)) {
+++		emit_a32_mov_i(tmp[0], off, ctx);
+++		emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
+++		rm = tmp[0];
+++		off = 0;
+++	} else if (rd[1] == rm) {
+++		emit(ARM_MOV_R(tmp[0], rm), ctx);
+++		rm = tmp[0];
+++	}
+++	switch (sz) {
+++	case BPF_B:
+++		/* Load a Byte */
+++		emit(ARM_LDRB_I(rd[1], rm, off), ctx);
+++		if (!ctx->prog->aux->verifier_zext)
+++			emit_a32_mov_i(rd[0], 0, ctx);
+++		break;
+++	case BPF_H:
+++		/* Load a HalfWord */
+++		emit(ARM_LDRH_I(rd[1], rm, off), ctx);
+++		if (!ctx->prog->aux->verifier_zext)
+++			emit_a32_mov_i(rd[0], 0, ctx);
+++		break;
+++	case BPF_W:
+++		/* Load a Word */
+++		emit(ARM_LDR_I(rd[1], rm, off), ctx);
+++		if (!ctx->prog->aux->verifier_zext)
+++			emit_a32_mov_i(rd[0], 0, ctx);
+++		break;
+++	case BPF_DW:
+++		/* Load a Double Word */
+++		emit(ARM_LDR_I(rd[1], rm, off), ctx);
+++		emit(ARM_LDR_I(rd[0], rm, off + 4), ctx);
+++		break;
+++	}
+++	arm_bpf_put_reg64(dst, rd, ctx);
+++}
+++
+++/* Arithmatic Operation */
+++static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
+++			     const u8 rn, struct jit_ctx *ctx, u8 op,
+++			     bool is_jmp64) {
+++	switch (op) {
+++	case BPF_JSET:
+++		if (is_jmp64) {
+++			emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
+++			emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
+++			emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
+++		} else {
+++			emit(ARM_ANDS_R(ARM_IP, rt, rn), ctx);
++ 		}
++-		return;
+++		break;
+++	case BPF_JEQ:
+++	case BPF_JNE:
+++	case BPF_JGT:
+++	case BPF_JGE:
+++	case BPF_JLE:
+++	case BPF_JLT:
+++		if (is_jmp64) {
+++			emit(ARM_CMP_R(rd, rm), ctx);
+++			/* Only compare low halve if high halve are equal. */
+++			_emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
+++		} else {
+++			emit(ARM_CMP_R(rt, rn), ctx);
+++		}
+++		break;
+++	case BPF_JSLE:
+++	case BPF_JSGT:
+++		emit(ARM_CMP_R(rn, rt), ctx);
+++		if (is_jmp64)
+++			emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
+++		break;
+++	case BPF_JSLT:
+++	case BPF_JSGE:
+++		emit(ARM_CMP_R(rt, rn), ctx);
+++		if (is_jmp64)
+++			emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
+++		break;
++ 	}
++-#endif
+++}
++ 
++-	/*
++-	 * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4
++-	 * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into
++-	 * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm
++-	 * before using it as a source for ARM_R1.
++-	 *
++-	 * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is
++-	 * ARM_R5 (r_X) so there is no particular register overlap
++-	 * issues.
+++static int out_offset = -1; /* initialized on the first pass of build_body() */
+++static int emit_bpf_tail_call(struct jit_ctx *ctx)
+++{
+++
+++	/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
+++	const s8 *r2 = bpf2a32[BPF_REG_2];
+++	const s8 *r3 = bpf2a32[BPF_REG_3];
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
+++	const s8 *tcc = bpf2a32[TCALL_CNT];
+++	const s8 *tc;
+++	const int idx0 = ctx->idx;
+++#define cur_offset (ctx->idx - idx0)
+++#define jmp_offset (out_offset - (cur_offset) - 2)
+++	u32 lo, hi;
+++	s8 r_array, r_index;
+++	int off;
+++
+++	/* if (index >= array->map.max_entries)
+++	 *	goto out;
++ 	 */
++-	if (rn != ARM_R1)
++-		emit(ARM_MOV_R(ARM_R1, rn), ctx);
++-	if (rm != ARM_R0)
++-		emit(ARM_MOV_R(ARM_R0, rm), ctx);
+++	BUILD_BUG_ON(offsetof(struct bpf_array, map.max_entries) >
+++		     ARM_INST_LDST__IMM12);
+++	off = offsetof(struct bpf_array, map.max_entries);
+++	r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx);
+++	/* index is 32-bit for arrays */
+++	r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx);
+++	/* array->map.max_entries */
+++	emit(ARM_LDR_I(tmp[1], r_array, off), ctx);
+++	/* index >= array->map.max_entries */
+++	emit(ARM_CMP_R(r_index, tmp[1]), ctx);
+++	_emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
+++
+++	/* tmp2[0] = array, tmp2[1] = index */
+++
+++	/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+++	 *	goto out;
+++	 * tail_call_cnt++;
+++	 */
+++	lo = (u32)MAX_TAIL_CALL_CNT;
+++	hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
+++	tc = arm_bpf_get_reg64(tcc, tmp, ctx);
+++	emit(ARM_CMP_I(tc[0], hi), ctx);
+++	_emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx);
+++	_emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
+++	emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx);
+++	emit(ARM_ADC_I(tc[0], tc[0], 0), ctx);
+++	arm_bpf_put_reg64(tcc, tmp, ctx);
+++
+++	/* prog = array->ptrs[index]
+++	 * if (prog == NULL)
+++	 *	goto out;
+++	 */
+++	BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0);
+++	off = imm8m(offsetof(struct bpf_array, ptrs));
+++	emit(ARM_ADD_I(tmp[1], r_array, off), ctx);
+++	emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx);
+++	emit(ARM_CMP_I(tmp[1], 0), ctx);
+++	_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
+++
+++	/* goto *(prog->bpf_func + prologue_size); */
+++	BUILD_BUG_ON(offsetof(struct bpf_prog, bpf_func) >
+++		     ARM_INST_LDST__IMM12);
+++	off = offsetof(struct bpf_prog, bpf_func);
+++	emit(ARM_LDR_I(tmp[1], tmp[1], off), ctx);
+++	emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
+++	emit_bx_r(tmp[1], ctx);
+++
+++	/* out: */
+++	if (out_offset == -1)
+++		out_offset = cur_offset;
+++	if (cur_offset != out_offset) {
+++		pr_err_once("tail_call out_offset = %d, expected %d!\n",
+++			    cur_offset, out_offset);
+++		return -1;
+++	}
+++	return 0;
+++#undef cur_offset
+++#undef jmp_offset
+++}
+++
+++/* 0xabcd => 0xcdab */
+++static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx)
+++{
+++#if __LINUX_ARM_ARCH__ < 6
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
++ 
++-	ctx->seen |= SEEN_CALL;
++-	emit_mov_i(ARM_R3, bpf_op == BPF_DIV ? (u32)jit_udiv : (u32)jit_mod,
++-		   ctx);
++-	emit_blx_r(ARM_R3, ctx);
+++	emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
+++	emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx);
+++	emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
+++	emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx);
+++#else /* ARMv6+ */
+++	emit(ARM_REV16(rd, rn), ctx);
+++#endif
+++}
++ 
++-	if (rd != ARM_R0)
++-		emit(ARM_MOV_R(rd, ARM_R0), ctx);
+++/* 0xabcdefgh => 0xghefcdab */
+++static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx)
+++{
+++#if __LINUX_ARM_ARCH__ < 6
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
+++
+++	emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
+++	emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx);
+++	emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx);
+++
+++	emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx);
+++	emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx);
+++	emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx);
+++	emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
+++	emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx);
+++	emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx);
+++	emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx);
+++
+++#else /* ARMv6+ */
+++	emit(ARM_REV(rd, rn), ctx);
+++#endif
++ }
++ 
++-static inline void update_on_xread(struct jit_ctx *ctx)
+++// push the scratch stack register on top of the stack
+++static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx)
++ {
++-	if (!(ctx->seen & SEEN_X))
++-		ctx->flags |= FLAG_NEED_X_RESET;
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
+++	const s8 *rt;
+++	u16 reg_set = 0;
+++
+++	rt = arm_bpf_get_reg64(src, tmp2, ctx);
++ 
++-	ctx->seen |= SEEN_X;
+++	reg_set = (1 << rt[1]) | (1 << rt[0]);
+++	emit(ARM_PUSH(reg_set), ctx);
++ }
++ 
++-static int build_body(struct jit_ctx *ctx)
+++static void build_prologue(struct jit_ctx *ctx)
++ {
++-	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
++-	const struct bpf_prog *prog = ctx->skf;
++-	const struct sock_filter *inst;
++-	unsigned i, load_order, off, condt;
++-	int imm12;
++-	u32 k;
+++	const s8 r0 = bpf2a32[BPF_REG_0][1];
+++	const s8 r2 = bpf2a32[BPF_REG_1][1];
+++	const s8 r3 = bpf2a32[BPF_REG_1][0];
+++	const s8 r4 = bpf2a32[BPF_REG_6][1];
+++	const s8 fplo = bpf2a32[BPF_REG_FP][1];
+++	const s8 fphi = bpf2a32[BPF_REG_FP][0];
+++	const s8 *tcc = bpf2a32[TCALL_CNT];
++ 
++-	for (i = 0; i < prog->len; i++) {
++-		u16 code;
+++	/* Save callee saved registers. */
+++#ifdef CONFIG_FRAME_POINTER
+++	u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC;
+++	emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
+++	emit(ARM_PUSH(reg_set), ctx);
+++	emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
+++#else
+++	emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
+++	emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
+++#endif
+++	/* Save frame pointer for later */
+++	emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx);
++ 
++-		inst = &(prog->insns[i]);
++-		/* K as an immediate value operand */
++-		k = inst->k;
++-		code = bpf_anc_helper(inst);
+++	ctx->stack_size = imm8m(STACK_SIZE);
++ 
++-		/* compute offsets only in the fake pass */
++-		if (ctx->target == NULL)
++-			ctx->offsets[i] = ctx->idx * 4;
+++	/* Set up function call stack */
+++	emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
++ 
++-		switch (code) {
++-		case BPF_LD | BPF_IMM:
++-			emit_mov_i(r_A, k, ctx);
++-			break;
++-		case BPF_LD | BPF_W | BPF_LEN:
++-			ctx->seen |= SEEN_SKB;
++-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
++-			emit(ARM_LDR_I(r_A, r_skb,
++-				       offsetof(struct sk_buff, len)), ctx);
++-			break;
++-		case BPF_LD | BPF_MEM:
++-			/* A = scratch[k] */
++-			ctx->seen |= SEEN_MEM_WORD(k);
++-			emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
++-			break;
++-		case BPF_LD | BPF_W | BPF_ABS:
++-			load_order = 2;
++-			goto load;
++-		case BPF_LD | BPF_H | BPF_ABS:
++-			load_order = 1;
++-			goto load;
++-		case BPF_LD | BPF_B | BPF_ABS:
++-			load_order = 0;
++-load:
++-			emit_mov_i(r_off, k, ctx);
++-load_common:
++-			ctx->seen |= SEEN_DATA | SEEN_CALL;
++-
++-			if (load_order > 0) {
++-				emit(ARM_SUB_I(r_scratch, r_skb_hl,
++-					       1 << load_order), ctx);
++-				emit(ARM_CMP_R(r_scratch, r_off), ctx);
++-				condt = ARM_COND_GE;
++-			} else {
++-				emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
++-				condt = ARM_COND_HI;
++-			}
+++	/* Set up BPF prog stack base register */
+++	emit_a32_mov_r(fplo, ARM_IP, ctx);
+++	emit_a32_mov_i(fphi, 0, ctx);
++ 
++-			/*
++-			 * test for negative offset, only if we are
++-			 * currently scheduled to take the fast
++-			 * path. this will update the flags so that
++-			 * the slowpath instruction are ignored if the
++-			 * offset is negative.
++-			 *
++-			 * for loard_order == 0 the HI condition will
++-			 * make loads at offset 0 take the slow path too.
++-			 */
++-			_emit(condt, ARM_CMP_I(r_off, 0), ctx);
+++	/* mov r4, 0 */
+++	emit(ARM_MOV_I(r4, 0), ctx);
++ 
++-			_emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data),
++-			      ctx);
+++	/* Move BPF_CTX to BPF_R1 */
+++	emit(ARM_MOV_R(r3, r4), ctx);
+++	emit(ARM_MOV_R(r2, r0), ctx);
+++	/* Initialize Tail Count */
+++	emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx);
+++	emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx);
+++	/* end of prologue */
+++}
++ 
++-			if (load_order == 0)
++-				_emit(condt, ARM_LDRB_I(r_A, r_scratch, 0),
++-				      ctx);
++-			else if (load_order == 1)
++-				emit_load_be16(condt, r_A, r_scratch, ctx);
++-			else if (load_order == 2)
++-				emit_load_be32(condt, r_A, r_scratch, ctx);
++-
++-			_emit(condt, ARM_B(b_imm(i + 1, ctx)), ctx);
++-
++-			/* the slowpath */
++-			emit_mov_i(ARM_R3, (u32)load_func[load_order], ctx);
++-			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
++-			/* the offset is already in R1 */
++-			emit_blx_r(ARM_R3, ctx);
++-			/* check the result of skb_copy_bits */
++-			emit(ARM_CMP_I(ARM_R1, 0), ctx);
++-			emit_err_ret(ARM_COND_NE, ctx);
++-			emit(ARM_MOV_R(r_A, ARM_R0), ctx);
++-			break;
++-		case BPF_LD | BPF_W | BPF_IND:
++-			load_order = 2;
++-			goto load_ind;
++-		case BPF_LD | BPF_H | BPF_IND:
++-			load_order = 1;
++-			goto load_ind;
++-		case BPF_LD | BPF_B | BPF_IND:
++-			load_order = 0;
++-load_ind:
++-			update_on_xread(ctx);
++-			OP_IMM3(ARM_ADD, r_off, r_X, k, ctx);
++-			goto load_common;
++-		case BPF_LDX | BPF_IMM:
++-			ctx->seen |= SEEN_X;
++-			emit_mov_i(r_X, k, ctx);
++-			break;
++-		case BPF_LDX | BPF_W | BPF_LEN:
++-			ctx->seen |= SEEN_X | SEEN_SKB;
++-			emit(ARM_LDR_I(r_X, r_skb,
++-				       offsetof(struct sk_buff, len)), ctx);
++-			break;
++-		case BPF_LDX | BPF_MEM:
++-			ctx->seen |= SEEN_X | SEEN_MEM_WORD(k);
++-			emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
++-			break;
++-		case BPF_LDX | BPF_B | BPF_MSH:
++-			/* x = ((*(frame + k)) & 0xf) << 2; */
++-			ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL;
++-			/* the interpreter should deal with the negative K */
++-			if ((int)k < 0)
++-				return -1;
++-			/* offset in r1: we might have to take the slow path */
++-			emit_mov_i(r_off, k, ctx);
++-			emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
++-
++-			/* load in r0: common with the slowpath */
++-			_emit(ARM_COND_HI, ARM_LDRB_R(ARM_R0, r_skb_data,
++-						      ARM_R1), ctx);
++-			/*
++-			 * emit_mov_i() might generate one or two instructions,
++-			 * the same holds for emit_blx_r()
++-			 */
++-			_emit(ARM_COND_HI, ARM_B(b_imm(i + 1, ctx) - 2), ctx);
+++/* restore callee saved registers. */
+++static void build_epilogue(struct jit_ctx *ctx)
+++{
+++#ifdef CONFIG_FRAME_POINTER
+++	/* When using frame pointers, some additional registers need to
+++	 * be loaded. */
+++	u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP;
+++	emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx);
+++	emit(ARM_LDM(ARM_SP, reg_set), ctx);
+++#else
+++	/* Restore callee saved registers. */
+++	emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx);
+++	emit(ARM_POP(CALLEE_POP_MASK), ctx);
+++#endif
+++}
++ 
++-			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
++-			/* r_off is r1 */
++-			emit_mov_i(ARM_R3, (u32)jit_get_skb_b, ctx);
++-			emit_blx_r(ARM_R3, ctx);
++-			/* check the return value of skb_copy_bits */
++-			emit(ARM_CMP_I(ARM_R1, 0), ctx);
++-			emit_err_ret(ARM_COND_NE, ctx);
++-
++-			emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx);
++-			emit(ARM_LSL_I(r_X, r_X, 2), ctx);
++-			break;
++-		case BPF_ST:
++-			ctx->seen |= SEEN_MEM_WORD(k);
++-			emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
++-			break;
++-		case BPF_STX:
++-			update_on_xread(ctx);
++-			ctx->seen |= SEEN_MEM_WORD(k);
++-			emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
++-			break;
++-		case BPF_ALU | BPF_ADD | BPF_K:
++-			/* A += K */
++-			OP_IMM3(ARM_ADD, r_A, r_A, k, ctx);
++-			break;
++-		case BPF_ALU | BPF_ADD | BPF_X:
++-			update_on_xread(ctx);
++-			emit(ARM_ADD_R(r_A, r_A, r_X), ctx);
++-			break;
++-		case BPF_ALU | BPF_SUB | BPF_K:
++-			/* A -= K */
++-			OP_IMM3(ARM_SUB, r_A, r_A, k, ctx);
++-			break;
++-		case BPF_ALU | BPF_SUB | BPF_X:
++-			update_on_xread(ctx);
++-			emit(ARM_SUB_R(r_A, r_A, r_X), ctx);
++-			break;
++-		case BPF_ALU | BPF_MUL | BPF_K:
++-			/* A *= K */
++-			emit_mov_i(r_scratch, k, ctx);
++-			emit(ARM_MUL(r_A, r_A, r_scratch), ctx);
++-			break;
++-		case BPF_ALU | BPF_MUL | BPF_X:
++-			update_on_xread(ctx);
++-			emit(ARM_MUL(r_A, r_A, r_X), ctx);
++-			break;
++-		case BPF_ALU | BPF_DIV | BPF_K:
++-			if (k == 1)
++-				break;
++-			emit_mov_i(r_scratch, k, ctx);
++-			emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_DIV);
++-			break;
++-		case BPF_ALU | BPF_DIV | BPF_X:
++-			update_on_xread(ctx);
++-			emit(ARM_CMP_I(r_X, 0), ctx);
++-			emit_err_ret(ARM_COND_EQ, ctx);
++-			emit_udivmod(r_A, r_A, r_X, ctx, BPF_DIV);
++-			break;
++-		case BPF_ALU | BPF_MOD | BPF_K:
++-			if (k == 1) {
++-				emit_mov_i(r_A, 0, ctx);
+++/*
+++ * Convert an eBPF instruction to native instruction, i.e
+++ * JITs an eBPF instruction.
+++ * Returns :
+++ *	0  - Successfully JITed an 8-byte eBPF instruction
+++ *	>0 - Successfully JITed a 16-byte eBPF instruction
+++ *	<0 - Failed to JIT.
+++ */
+++static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
+++{
+++	const u8 code = insn->code;
+++	const s8 *dst = bpf2a32[insn->dst_reg];
+++	const s8 *src = bpf2a32[insn->src_reg];
+++	const s8 *tmp = bpf2a32[TMP_REG_1];
+++	const s8 *tmp2 = bpf2a32[TMP_REG_2];
+++	const s16 off = insn->off;
+++	const s32 imm = insn->imm;
+++	const int i = insn - ctx->prog->insnsi;
+++	const bool is64 = BPF_CLASS(code) == BPF_ALU64;
+++	const s8 *rd, *rs;
+++	s8 rd_lo, rt, rm, rn;
+++	s32 jmp_offset;
+++
+++#define check_imm(bits, imm) do {				\
+++	if ((imm) >= (1 << ((bits) - 1)) ||			\
+++	    (imm) < -(1 << ((bits) - 1))) {			\
+++		pr_info("[%2d] imm=%d(0x%x) out of range\n",	\
+++			i, imm, imm);				\
+++		return -EINVAL;					\
+++	}							\
+++} while (0)
+++#define check_imm24(imm) check_imm(24, imm)
+++
+++	switch (code) {
+++	/* ALU operations */
+++
+++	/* dst = src */
+++	case BPF_ALU | BPF_MOV | BPF_K:
+++	case BPF_ALU | BPF_MOV | BPF_X:
+++	case BPF_ALU64 | BPF_MOV | BPF_K:
+++	case BPF_ALU64 | BPF_MOV | BPF_X:
+++		switch (BPF_SRC(code)) {
+++		case BPF_X:
+++			if (imm == 1) {
+++				/* Special mov32 for zext */
+++				emit_a32_mov_i(dst_hi, 0, ctx);
++ 				break;
++ 			}
++-			emit_mov_i(r_scratch, k, ctx);
++-			emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_MOD);
+++			emit_a32_mov_r64(is64, dst, src, ctx);
++ 			break;
++-		case BPF_ALU | BPF_MOD | BPF_X:
++-			update_on_xread(ctx);
++-			emit(ARM_CMP_I(r_X, 0), ctx);
++-			emit_err_ret(ARM_COND_EQ, ctx);
++-			emit_udivmod(r_A, r_A, r_X, ctx, BPF_MOD);
++-			break;
++-		case BPF_ALU | BPF_OR | BPF_K:
++-			/* A |= K */
++-			OP_IMM3(ARM_ORR, r_A, r_A, k, ctx);
++-			break;
++-		case BPF_ALU | BPF_OR | BPF_X:
++-			update_on_xread(ctx);
++-			emit(ARM_ORR_R(r_A, r_A, r_X), ctx);
++-			break;
++-		case BPF_ALU | BPF_XOR | BPF_K:
++-			/* A ^= K; */
++-			OP_IMM3(ARM_EOR, r_A, r_A, k, ctx);
++-			break;
++-		case BPF_ANC | SKF_AD_ALU_XOR_X:
++-		case BPF_ALU | BPF_XOR | BPF_X:
++-			/* A ^= X */
++-			update_on_xread(ctx);
++-			emit(ARM_EOR_R(r_A, r_A, r_X), ctx);
++-			break;
++-		case BPF_ALU | BPF_AND | BPF_K:
++-			/* A &= K */
++-			OP_IMM3(ARM_AND, r_A, r_A, k, ctx);
++-			break;
++-		case BPF_ALU | BPF_AND | BPF_X:
++-			update_on_xread(ctx);
++-			emit(ARM_AND_R(r_A, r_A, r_X), ctx);
++-			break;
++-		case BPF_ALU | BPF_LSH | BPF_K:
++-			if (unlikely(k > 31))
++-				return -1;
++-			emit(ARM_LSL_I(r_A, r_A, k), ctx);
++-			break;
++-		case BPF_ALU | BPF_LSH | BPF_X:
++-			update_on_xread(ctx);
++-			emit(ARM_LSL_R(r_A, r_A, r_X), ctx);
++-			break;
++-		case BPF_ALU | BPF_RSH | BPF_K:
++-			if (unlikely(k > 31))
++-				return -1;
++-			if (k)
++-				emit(ARM_LSR_I(r_A, r_A, k), ctx);
++-			break;
++-		case BPF_ALU | BPF_RSH | BPF_X:
++-			update_on_xread(ctx);
++-			emit(ARM_LSR_R(r_A, r_A, r_X), ctx);
++-			break;
++-		case BPF_ALU | BPF_NEG:
++-			/* A = -A */
++-			emit(ARM_RSB_I(r_A, r_A, 0), ctx);
++-			break;
++-		case BPF_JMP | BPF_JA:
++-			/* pc += K */
++-			emit(ARM_B(b_imm(i + k + 1, ctx)), ctx);
++-			break;
++-		case BPF_JMP | BPF_JEQ | BPF_K:
++-			/* pc += (A == K) ? pc->jt : pc->jf */
++-			condt  = ARM_COND_EQ;
++-			goto cmp_imm;
++-		case BPF_JMP | BPF_JGT | BPF_K:
++-			/* pc += (A > K) ? pc->jt : pc->jf */
++-			condt  = ARM_COND_HI;
++-			goto cmp_imm;
++-		case BPF_JMP | BPF_JGE | BPF_K:
++-			/* pc += (A >= K) ? pc->jt : pc->jf */
++-			condt  = ARM_COND_HS;
++-cmp_imm:
++-			imm12 = imm8m(k);
++-			if (imm12 < 0) {
++-				emit_mov_i_no8m(r_scratch, k, ctx);
++-				emit(ARM_CMP_R(r_A, r_scratch), ctx);
++-			} else {
++-				emit(ARM_CMP_I(r_A, imm12), ctx);
++-			}
++-cond_jump:
++-			if (inst->jt)
++-				_emit(condt, ARM_B(b_imm(i + inst->jt + 1,
++-						   ctx)), ctx);
++-			if (inst->jf)
++-				_emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1,
++-							     ctx)), ctx);
++-			break;
++-		case BPF_JMP | BPF_JEQ | BPF_X:
++-			/* pc += (A == X) ? pc->jt : pc->jf */
++-			condt   = ARM_COND_EQ;
++-			goto cmp_x;
++-		case BPF_JMP | BPF_JGT | BPF_X:
++-			/* pc += (A > X) ? pc->jt : pc->jf */
++-			condt   = ARM_COND_HI;
++-			goto cmp_x;
++-		case BPF_JMP | BPF_JGE | BPF_X:
++-			/* pc += (A >= X) ? pc->jt : pc->jf */
++-			condt   = ARM_COND_CS;
++-cmp_x:
++-			update_on_xread(ctx);
++-			emit(ARM_CMP_R(r_A, r_X), ctx);
++-			goto cond_jump;
++-		case BPF_JMP | BPF_JSET | BPF_K:
++-			/* pc += (A & K) ? pc->jt : pc->jf */
++-			condt  = ARM_COND_NE;
++-			/* not set iff all zeroes iff Z==1 iff EQ */
++-
++-			imm12 = imm8m(k);
++-			if (imm12 < 0) {
++-				emit_mov_i_no8m(r_scratch, k, ctx);
++-				emit(ARM_TST_R(r_A, r_scratch), ctx);
++-			} else {
++-				emit(ARM_TST_I(r_A, imm12), ctx);
++-			}
++-			goto cond_jump;
++-		case BPF_JMP | BPF_JSET | BPF_X:
++-			/* pc += (A & X) ? pc->jt : pc->jf */
++-			update_on_xread(ctx);
++-			condt  = ARM_COND_NE;
++-			emit(ARM_TST_R(r_A, r_X), ctx);
++-			goto cond_jump;
++-		case BPF_RET | BPF_A:
++-			emit(ARM_MOV_R(ARM_R0, r_A), ctx);
++-			goto b_epilogue;
++-		case BPF_RET | BPF_K:
++-			if ((k == 0) && (ctx->ret0_fp_idx < 0))
++-				ctx->ret0_fp_idx = i;
++-			emit_mov_i(ARM_R0, k, ctx);
++-b_epilogue:
++-			if (i != ctx->skf->len - 1)
++-				emit(ARM_B(b_imm(prog->len, ctx)), ctx);
++-			break;
++-		case BPF_MISC | BPF_TAX:
++-			/* X = A */
++-			ctx->seen |= SEEN_X;
++-			emit(ARM_MOV_R(r_X, r_A), ctx);
++-			break;
++-		case BPF_MISC | BPF_TXA:
++-			/* A = X */
++-			update_on_xread(ctx);
++-			emit(ARM_MOV_R(r_A, r_X), ctx);
++-			break;
++-		case BPF_ANC | SKF_AD_PROTOCOL:
++-			/* A = ntohs(skb->protocol) */
++-			ctx->seen |= SEEN_SKB;
++-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
++-						  protocol) != 2);
++-			off = offsetof(struct sk_buff, protocol);
++-			emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx);
++-			emit_swap16(r_A, r_scratch, ctx);
++-			break;
++-		case BPF_ANC | SKF_AD_CPU:
++-			/* r_scratch = current_thread_info() */
++-			OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx);
++-			/* A = current_thread_info()->cpu */
++-			BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4);
++-			off = offsetof(struct thread_info, cpu);
++-			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
++-			break;
++-		case BPF_ANC | SKF_AD_IFINDEX:
++-		case BPF_ANC | SKF_AD_HATYPE:
++-			/* A = skb->dev->ifindex */
++-			/* A = skb->dev->type */
++-			ctx->seen |= SEEN_SKB;
++-			off = offsetof(struct sk_buff, dev);
++-			emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
++-
++-			emit(ARM_CMP_I(r_scratch, 0), ctx);
++-			emit_err_ret(ARM_COND_EQ, ctx);
++-
++-			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
++-						  ifindex) != 4);
++-			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
++-						  type) != 2);
++-
++-			if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
++-				off = offsetof(struct net_device, ifindex);
++-				emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
++-			} else {
++-				/*
++-				 * offset of field "type" in "struct
++-				 * net_device" is above what can be
++-				 * used in the ldrh rd, [rn, #imm]
++-				 * instruction, so load the offset in
++-				 * a register and use ldrh rd, [rn, rm]
++-				 */
++-				off = offsetof(struct net_device, type);
++-				emit_mov_i(ARM_R3, off, ctx);
++-				emit(ARM_LDRH_R(r_A, r_scratch, ARM_R3), ctx);
++-			}
+++		case BPF_K:
+++			/* Sign-extend immediate value to destination reg */
+++			emit_a32_mov_se_i64(is64, dst, imm, ctx);
++ 			break;
++-		case BPF_ANC | SKF_AD_MARK:
++-			ctx->seen |= SEEN_SKB;
++-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
++-			off = offsetof(struct sk_buff, mark);
++-			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
++-			break;
++-		case BPF_ANC | SKF_AD_RXHASH:
++-			ctx->seen |= SEEN_SKB;
++-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
++-			off = offsetof(struct sk_buff, hash);
++-			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
++-			break;
++-		case BPF_ANC | SKF_AD_VLAN_TAG:
++-		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
++-			ctx->seen |= SEEN_SKB;
++-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
++-			off = offsetof(struct sk_buff, vlan_tci);
++-			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
++-			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
++-				OP_IMM3(ARM_AND, r_A, r_A, ~VLAN_TAG_PRESENT, ctx);
++-			else {
++-				OP_IMM3(ARM_LSR, r_A, r_A, 12, ctx);
++-				OP_IMM3(ARM_AND, r_A, r_A, 0x1, ctx);
++-			}
+++		}
+++		break;
+++	/* dst = dst + src/imm */
+++	/* dst = dst - src/imm */
+++	/* dst = dst | src/imm */
+++	/* dst = dst & src/imm */
+++	/* dst = dst ^ src/imm */
+++	/* dst = dst * src/imm */
+++	/* dst = dst << src */
+++	/* dst = dst >> src */
+++	case BPF_ALU | BPF_ADD | BPF_K:
+++	case BPF_ALU | BPF_ADD | BPF_X:
+++	case BPF_ALU | BPF_SUB | BPF_K:
+++	case BPF_ALU | BPF_SUB | BPF_X:
+++	case BPF_ALU | BPF_OR | BPF_K:
+++	case BPF_ALU | BPF_OR | BPF_X:
+++	case BPF_ALU | BPF_AND | BPF_K:
+++	case BPF_ALU | BPF_AND | BPF_X:
+++	case BPF_ALU | BPF_XOR | BPF_K:
+++	case BPF_ALU | BPF_XOR | BPF_X:
+++	case BPF_ALU | BPF_MUL | BPF_K:
+++	case BPF_ALU | BPF_MUL | BPF_X:
+++	case BPF_ALU | BPF_LSH | BPF_X:
+++	case BPF_ALU | BPF_RSH | BPF_X:
+++	case BPF_ALU | BPF_ARSH | BPF_K:
+++	case BPF_ALU | BPF_ARSH | BPF_X:
+++	case BPF_ALU64 | BPF_ADD | BPF_K:
+++	case BPF_ALU64 | BPF_ADD | BPF_X:
+++	case BPF_ALU64 | BPF_SUB | BPF_K:
+++	case BPF_ALU64 | BPF_SUB | BPF_X:
+++	case BPF_ALU64 | BPF_OR | BPF_K:
+++	case BPF_ALU64 | BPF_OR | BPF_X:
+++	case BPF_ALU64 | BPF_AND | BPF_K:
+++	case BPF_ALU64 | BPF_AND | BPF_X:
+++	case BPF_ALU64 | BPF_XOR | BPF_K:
+++	case BPF_ALU64 | BPF_XOR | BPF_X:
+++		switch (BPF_SRC(code)) {
+++		case BPF_X:
+++			emit_a32_alu_r64(is64, dst, src, ctx, BPF_OP(code));
+++			break;
+++		case BPF_K:
+++			/* Move immediate value to the temporary register
+++			 * and then do the ALU operation on the temporary
+++			 * register as this will sign-extend the immediate
+++			 * value into temporary reg and then it would be
+++			 * safe to do the operation on it.
+++			 */
+++			emit_a32_mov_se_i64(is64, tmp2, imm, ctx);
+++			emit_a32_alu_r64(is64, dst, tmp2, ctx, BPF_OP(code));
++ 			break;
++-		case BPF_ANC | SKF_AD_PKTTYPE:
++-			ctx->seen |= SEEN_SKB;
++-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
++-						  __pkt_type_offset[0]) != 1);
++-			off = PKT_TYPE_OFFSET();
++-			emit(ARM_LDRB_I(r_A, r_skb, off), ctx);
++-			emit(ARM_AND_I(r_A, r_A, PKT_TYPE_MAX), ctx);
++-#ifdef __BIG_ENDIAN_BITFIELD
++-			emit(ARM_LSR_I(r_A, r_A, 5), ctx);
++-#endif
+++		}
+++		break;
+++	/* dst = dst / src(imm) */
+++	/* dst = dst % src(imm) */
+++	case BPF_ALU | BPF_DIV | BPF_K:
+++	case BPF_ALU | BPF_DIV | BPF_X:
+++	case BPF_ALU | BPF_MOD | BPF_K:
+++	case BPF_ALU | BPF_MOD | BPF_X:
+++		rd_lo = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx);
+++		switch (BPF_SRC(code)) {
+++		case BPF_X:
+++			rt = arm_bpf_get_reg32(src_lo, tmp2[0], ctx);
+++			break;
+++		case BPF_K:
+++			rt = tmp2[0];
+++			emit_a32_mov_i(rt, imm, ctx);
++ 			break;
++-		case BPF_ANC | SKF_AD_QUEUE:
++-			ctx->seen |= SEEN_SKB;
++-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
++-						  queue_mapping) != 2);
++-			BUILD_BUG_ON(offsetof(struct sk_buff,
++-					      queue_mapping) > 0xff);
++-			off = offsetof(struct sk_buff, queue_mapping);
++-			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
++-			break;
++-		case BPF_ANC | SKF_AD_PAY_OFFSET:
++-			ctx->seen |= SEEN_SKB | SEEN_CALL;
++-
++-			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
++-			emit_mov_i(ARM_R3, (unsigned int)skb_get_poff, ctx);
++-			emit_blx_r(ARM_R3, ctx);
++-			emit(ARM_MOV_R(r_A, ARM_R0), ctx);
++-			break;
++-		case BPF_LDX | BPF_W | BPF_ABS:
++-			/*
++-			 * load a 32bit word from struct seccomp_data.
++-			 * seccomp_check_filter() will already have checked
++-			 * that k is 32bit aligned and lies within the
++-			 * struct seccomp_data.
+++		default:
+++			rt = src_lo;
+++			break;
+++		}
+++		emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code));
+++		arm_bpf_put_reg32(dst_lo, rd_lo, ctx);
+++		if (!ctx->prog->aux->verifier_zext)
+++			emit_a32_mov_i(dst_hi, 0, ctx);
+++		break;
+++	case BPF_ALU64 | BPF_DIV | BPF_K:
+++	case BPF_ALU64 | BPF_DIV | BPF_X:
+++	case BPF_ALU64 | BPF_MOD | BPF_K:
+++	case BPF_ALU64 | BPF_MOD | BPF_X:
+++		goto notyet;
+++	/* dst = dst >> imm */
+++	/* dst = dst << imm */
+++	case BPF_ALU | BPF_RSH | BPF_K:
+++	case BPF_ALU | BPF_LSH | BPF_K:
+++		if (unlikely(imm > 31))
+++			return -EINVAL;
+++		if (imm)
+++			emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code));
+++		if (!ctx->prog->aux->verifier_zext)
+++			emit_a32_mov_i(dst_hi, 0, ctx);
+++		break;
+++	/* dst = dst << imm */
+++	case BPF_ALU64 | BPF_LSH | BPF_K:
+++		if (unlikely(imm > 63))
+++			return -EINVAL;
+++		emit_a32_lsh_i64(dst, imm, ctx);
+++		break;
+++	/* dst = dst >> imm */
+++	case BPF_ALU64 | BPF_RSH | BPF_K:
+++		if (unlikely(imm > 63))
+++			return -EINVAL;
+++		emit_a32_rsh_i64(dst, imm, ctx);
+++		break;
+++	/* dst = dst << src */
+++	case BPF_ALU64 | BPF_LSH | BPF_X:
+++		emit_a32_lsh_r64(dst, src, ctx);
+++		break;
+++	/* dst = dst >> src */
+++	case BPF_ALU64 | BPF_RSH | BPF_X:
+++		emit_a32_rsh_r64(dst, src, ctx);
+++		break;
+++	/* dst = dst >> src (signed) */
+++	case BPF_ALU64 | BPF_ARSH | BPF_X:
+++		emit_a32_arsh_r64(dst, src, ctx);
+++		break;
+++	/* dst = dst >> imm (signed) */
+++	case BPF_ALU64 | BPF_ARSH | BPF_K:
+++		if (unlikely(imm > 63))
+++			return -EINVAL;
+++		emit_a32_arsh_i64(dst, imm, ctx);
+++		break;
+++	/* dst = ~dst */
+++	case BPF_ALU | BPF_NEG:
+++		emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code));
+++		if (!ctx->prog->aux->verifier_zext)
+++			emit_a32_mov_i(dst_hi, 0, ctx);
+++		break;
+++	/* dst = ~dst (64 bit) */
+++	case BPF_ALU64 | BPF_NEG:
+++		emit_a32_neg64(dst, ctx);
+++		break;
+++	/* dst = dst * src/imm */
+++	case BPF_ALU64 | BPF_MUL | BPF_X:
+++	case BPF_ALU64 | BPF_MUL | BPF_K:
+++		switch (BPF_SRC(code)) {
+++		case BPF_X:
+++			emit_a32_mul_r64(dst, src, ctx);
+++			break;
+++		case BPF_K:
+++			/* Move immediate value to the temporary register
+++			 * and then do the multiplication on it as this
+++			 * will sign-extend the immediate value into temp
+++			 * reg then it would be safe to do the operation
+++			 * on it.
++ 			 */
++-			ctx->seen |= SEEN_SKB;
++-			emit(ARM_LDR_I(r_A, r_skb, k), ctx);
+++			emit_a32_mov_se_i64(is64, tmp2, imm, ctx);
+++			emit_a32_mul_r64(dst, tmp2, ctx);
+++			break;
+++		}
+++		break;
+++	/* dst = htole(dst) */
+++	/* dst = htobe(dst) */
+++	case BPF_ALU | BPF_END | BPF_FROM_LE:
+++	case BPF_ALU | BPF_END | BPF_FROM_BE:
+++		rd = arm_bpf_get_reg64(dst, tmp, ctx);
+++		if (BPF_SRC(code) == BPF_FROM_LE)
+++			goto emit_bswap_uxt;
+++		switch (imm) {
+++		case 16:
+++			emit_rev16(rd[1], rd[1], ctx);
+++			goto emit_bswap_uxt;
+++		case 32:
+++			emit_rev32(rd[1], rd[1], ctx);
+++			goto emit_bswap_uxt;
+++		case 64:
+++			emit_rev32(ARM_LR, rd[1], ctx);
+++			emit_rev32(rd[1], rd[0], ctx);
+++			emit(ARM_MOV_R(rd[0], ARM_LR), ctx);
++ 			break;
++-		default:
++-			return -1;
++ 		}
+++		goto exit;
+++emit_bswap_uxt:
+++		switch (imm) {
+++		case 16:
+++			/* zero-extend 16 bits into 64 bits */
+++#if __LINUX_ARM_ARCH__ < 6
+++			emit_a32_mov_i(tmp2[1], 0xffff, ctx);
+++			emit(ARM_AND_R(rd[1], rd[1], tmp2[1]), ctx);
+++#else /* ARMv6+ */
+++			emit(ARM_UXTH(rd[1], rd[1]), ctx);
+++#endif
+++			if (!ctx->prog->aux->verifier_zext)
+++				emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
+++			break;
+++		case 32:
+++			/* zero-extend 32 bits into 64 bits */
+++			if (!ctx->prog->aux->verifier_zext)
+++				emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx);
+++			break;
+++		case 64:
+++			/* nop */
+++			break;
+++		}
+++exit:
+++		arm_bpf_put_reg64(dst, rd, ctx);
+++		break;
+++	/* dst = imm64 */
+++	case BPF_LD | BPF_IMM | BPF_DW:
+++	{
+++		u64 val = (u32)imm | (u64)insn[1].imm << 32;
++ 
++-		if (ctx->flags & FLAG_IMM_OVERFLOW)
++-			/*
++-			 * this instruction generated an overflow when
++-			 * trying to access the literal pool, so
++-			 * delegate this filter to the kernel interpreter.
++-			 */
++-			return -1;
+++		emit_a32_mov_i64(dst, val, ctx);
+++
+++		return 1;
++ 	}
+++	/* LDX: dst = *(size *)(src + off) */
+++	case BPF_LDX | BPF_MEM | BPF_W:
+++	case BPF_LDX | BPF_MEM | BPF_H:
+++	case BPF_LDX | BPF_MEM | BPF_B:
+++	case BPF_LDX | BPF_MEM | BPF_DW:
+++		rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
+++		emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
+++		break;
+++	/* ST: *(size *)(dst + off) = imm */
+++	case BPF_ST | BPF_MEM | BPF_W:
+++	case BPF_ST | BPF_MEM | BPF_H:
+++	case BPF_ST | BPF_MEM | BPF_B:
+++	case BPF_ST | BPF_MEM | BPF_DW:
+++		switch (BPF_SIZE(code)) {
+++		case BPF_DW:
+++			/* Sign-extend immediate value into temp reg */
+++			emit_a32_mov_se_i64(true, tmp2, imm, ctx);
+++			break;
+++		case BPF_W:
+++		case BPF_H:
+++		case BPF_B:
+++			emit_a32_mov_i(tmp2[1], imm, ctx);
+++			break;
+++		}
+++		emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code));
+++		break;
+++	/* STX XADD: lock *(u32 *)(dst + off) += src */
+++	case BPF_STX | BPF_XADD | BPF_W:
+++	/* STX XADD: lock *(u64 *)(dst + off) += src */
+++	case BPF_STX | BPF_XADD | BPF_DW:
+++		goto notyet;
+++	/* STX: *(size *)(dst + off) = src */
+++	case BPF_STX | BPF_MEM | BPF_W:
+++	case BPF_STX | BPF_MEM | BPF_H:
+++	case BPF_STX | BPF_MEM | BPF_B:
+++	case BPF_STX | BPF_MEM | BPF_DW:
+++		rs = arm_bpf_get_reg64(src, tmp2, ctx);
+++		emit_str_r(dst_lo, rs, off, ctx, BPF_SIZE(code));
+++		break;
+++	/* PC += off if dst == src */
+++	/* PC += off if dst > src */
+++	/* PC += off if dst >= src */
+++	/* PC += off if dst < src */
+++	/* PC += off if dst <= src */
+++	/* PC += off if dst != src */
+++	/* PC += off if dst > src (signed) */
+++	/* PC += off if dst >= src (signed) */
+++	/* PC += off if dst < src (signed) */
+++	/* PC += off if dst <= src (signed) */
+++	/* PC += off if dst & src */
+++	case BPF_JMP | BPF_JEQ | BPF_X:
+++	case BPF_JMP | BPF_JGT | BPF_X:
+++	case BPF_JMP | BPF_JGE | BPF_X:
+++	case BPF_JMP | BPF_JNE | BPF_X:
+++	case BPF_JMP | BPF_JSGT | BPF_X:
+++	case BPF_JMP | BPF_JSGE | BPF_X:
+++	case BPF_JMP | BPF_JSET | BPF_X:
+++	case BPF_JMP | BPF_JLE | BPF_X:
+++	case BPF_JMP | BPF_JLT | BPF_X:
+++	case BPF_JMP | BPF_JSLT | BPF_X:
+++	case BPF_JMP | BPF_JSLE | BPF_X:
+++	case BPF_JMP32 | BPF_JEQ | BPF_X:
+++	case BPF_JMP32 | BPF_JGT | BPF_X:
+++	case BPF_JMP32 | BPF_JGE | BPF_X:
+++	case BPF_JMP32 | BPF_JNE | BPF_X:
+++	case BPF_JMP32 | BPF_JSGT | BPF_X:
+++	case BPF_JMP32 | BPF_JSGE | BPF_X:
+++	case BPF_JMP32 | BPF_JSET | BPF_X:
+++	case BPF_JMP32 | BPF_JLE | BPF_X:
+++	case BPF_JMP32 | BPF_JLT | BPF_X:
+++	case BPF_JMP32 | BPF_JSLT | BPF_X:
+++	case BPF_JMP32 | BPF_JSLE | BPF_X:
+++		/* Setup source registers */
+++		rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx);
+++		rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
+++		goto go_jmp;
+++	/* PC += off if dst == imm */
+++	/* PC += off if dst > imm */
+++	/* PC += off if dst >= imm */
+++	/* PC += off if dst < imm */
+++	/* PC += off if dst <= imm */
+++	/* PC += off if dst != imm */
+++	/* PC += off if dst > imm (signed) */
+++	/* PC += off if dst >= imm (signed) */
+++	/* PC += off if dst < imm (signed) */
+++	/* PC += off if dst <= imm (signed) */
+++	/* PC += off if dst & imm */
+++	case BPF_JMP | BPF_JEQ | BPF_K:
+++	case BPF_JMP | BPF_JGT | BPF_K:
+++	case BPF_JMP | BPF_JGE | BPF_K:
+++	case BPF_JMP | BPF_JNE | BPF_K:
+++	case BPF_JMP | BPF_JSGT | BPF_K:
+++	case BPF_JMP | BPF_JSGE | BPF_K:
+++	case BPF_JMP | BPF_JSET | BPF_K:
+++	case BPF_JMP | BPF_JLT | BPF_K:
+++	case BPF_JMP | BPF_JLE | BPF_K:
+++	case BPF_JMP | BPF_JSLT | BPF_K:
+++	case BPF_JMP | BPF_JSLE | BPF_K:
+++	case BPF_JMP32 | BPF_JEQ | BPF_K:
+++	case BPF_JMP32 | BPF_JGT | BPF_K:
+++	case BPF_JMP32 | BPF_JGE | BPF_K:
+++	case BPF_JMP32 | BPF_JNE | BPF_K:
+++	case BPF_JMP32 | BPF_JSGT | BPF_K:
+++	case BPF_JMP32 | BPF_JSGE | BPF_K:
+++	case BPF_JMP32 | BPF_JSET | BPF_K:
+++	case BPF_JMP32 | BPF_JLT | BPF_K:
+++	case BPF_JMP32 | BPF_JLE | BPF_K:
+++	case BPF_JMP32 | BPF_JSLT | BPF_K:
+++	case BPF_JMP32 | BPF_JSLE | BPF_K:
+++		if (off == 0)
+++			break;
+++		rm = tmp2[0];
+++		rn = tmp2[1];
+++		/* Sign-extend immediate value */
+++		emit_a32_mov_se_i64(true, tmp2, imm, ctx);
+++go_jmp:
+++		/* Setup destination register */
+++		rd = arm_bpf_get_reg64(dst, tmp, ctx);
+++
+++		/* Check for the condition */
+++		emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code),
+++			  BPF_CLASS(code) == BPF_JMP);
+++
+++		/* Setup JUMP instruction */
+++		jmp_offset = bpf2a32_offset(i+off, i, ctx);
+++		switch (BPF_OP(code)) {
+++		case BPF_JNE:
+++		case BPF_JSET:
+++			_emit(ARM_COND_NE, ARM_B(jmp_offset), ctx);
+++			break;
+++		case BPF_JEQ:
+++			_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
+++			break;
+++		case BPF_JGT:
+++			_emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
+++			break;
+++		case BPF_JGE:
+++			_emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
+++			break;
+++		case BPF_JSGT:
+++			_emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
+++			break;
+++		case BPF_JSGE:
+++			_emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
+++			break;
+++		case BPF_JLE:
+++			_emit(ARM_COND_LS, ARM_B(jmp_offset), ctx);
+++			break;
+++		case BPF_JLT:
+++			_emit(ARM_COND_CC, ARM_B(jmp_offset), ctx);
+++			break;
+++		case BPF_JSLT:
+++			_emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
+++			break;
+++		case BPF_JSLE:
+++			_emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
+++			break;
+++		}
+++		break;
+++	/* JMP OFF */
+++	case BPF_JMP | BPF_JA:
+++	{
+++		if (off == 0)
+++			break;
+++		jmp_offset = bpf2a32_offset(i+off, i, ctx);
+++		check_imm24(jmp_offset);
+++		emit(ARM_B(jmp_offset), ctx);
+++		break;
+++	}
+++	/* tail call */
+++	case BPF_JMP | BPF_TAIL_CALL:
+++		if (emit_bpf_tail_call(ctx))
+++			return -EFAULT;
+++		break;
+++	/* function call */
+++	case BPF_JMP | BPF_CALL:
+++	{
+++		const s8 *r0 = bpf2a32[BPF_REG_0];
+++		const s8 *r1 = bpf2a32[BPF_REG_1];
+++		const s8 *r2 = bpf2a32[BPF_REG_2];
+++		const s8 *r3 = bpf2a32[BPF_REG_3];
+++		const s8 *r4 = bpf2a32[BPF_REG_4];
+++		const s8 *r5 = bpf2a32[BPF_REG_5];
+++		const u32 func = (u32)__bpf_call_base + (u32)imm;
+++
+++		emit_a32_mov_r64(true, r0, r1, ctx);
+++		emit_a32_mov_r64(true, r1, r2, ctx);
+++		emit_push_r64(r5, ctx);
+++		emit_push_r64(r4, ctx);
+++		emit_push_r64(r3, ctx);
++ 
++-	/* compute offsets only during the first pass */
++-	if (ctx->target == NULL)
++-		ctx->offsets[i] = ctx->idx * 4;
+++		emit_a32_mov_i(tmp[1], func, ctx);
+++		emit_blx_r(tmp[1], ctx);
++ 
+++		emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean
+++		break;
+++	}
+++	/* function return */
+++	case BPF_JMP | BPF_EXIT:
+++		/* Optimization: when last instruction is EXIT
+++		 * simply fallthrough to epilogue.
+++		 */
+++		if (i == ctx->prog->len - 1)
+++			break;
+++		jmp_offset = epilogue_offset(ctx);
+++		check_imm24(jmp_offset);
+++		emit(ARM_B(jmp_offset), ctx);
+++		break;
+++notyet:
+++		pr_info_once("*** NOT YET: opcode %02x ***\n", code);
+++		return -EFAULT;
+++	default:
+++		pr_err_once("unknown opcode %02x\n", code);
+++		return -EINVAL;
+++	}
+++
+++	if (ctx->flags & FLAG_IMM_OVERFLOW)
+++		/*
+++		 * this instruction generated an overflow when
+++		 * trying to access the literal pool, so
+++		 * delegate this filter to the kernel interpreter.
+++		 */
+++		return -1;
++ 	return 0;
++ }
++ 
+++static int build_body(struct jit_ctx *ctx)
+++{
+++	const struct bpf_prog *prog = ctx->prog;
+++	unsigned int i;
+++
+++	for (i = 0; i < prog->len; i++) {
+++		const struct bpf_insn *insn = &(prog->insnsi[i]);
+++		int ret;
+++
+++		ret = build_insn(insn, ctx);
+++
+++		/* It's used with loading the 64 bit immediate value. */
+++		if (ret > 0) {
+++			i++;
+++			if (ctx->target == NULL)
+++				ctx->offsets[i] = ctx->idx;
+++			continue;
+++		}
+++
+++		if (ctx->target == NULL)
+++			ctx->offsets[i] = ctx->idx;
+++
+++		/* If unsuccesfull, return with error code */
+++		if (ret)
+++			return ret;
+++	}
+++	return 0;
+++}
+++
+++static int validate_code(struct jit_ctx *ctx)
+++{
+++	int i;
+++
+++	for (i = 0; i < ctx->idx; i++) {
+++		if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF))
+++			return -1;
+++	}
+++
+++	return 0;
+++}
++ 
++-void bpf_jit_compile(struct bpf_prog *fp)
+++void bpf_jit_compile(struct bpf_prog *prog)
++ {
+++	/* Nothing to do here. We support Internal BPF. */
+++}
+++
+++bool bpf_jit_needs_zext(void)
+++{
+++	return true;
+++}
+++
+++struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+++{
+++	struct bpf_prog *tmp, *orig_prog = prog;
++ 	struct bpf_binary_header *header;
+++	bool tmp_blinded = false;
++ 	struct jit_ctx ctx;
++-	unsigned tmp_idx;
++-	unsigned alloc_size;
++-	u8 *target_ptr;
+++	unsigned int tmp_idx;
+++	unsigned int image_size;
+++	u8 *image_ptr;
++ 
++-	if (!bpf_jit_enable)
++-		return;
+++	/* If BPF JIT was not enabled then we must fall back to
+++	 * the interpreter.
+++	 */
+++	if (!prog->jit_requested)
+++		return orig_prog;
++ 
++-	memset(&ctx, 0, sizeof(ctx));
++-	ctx.skf		= fp;
++-	ctx.ret0_fp_idx = -1;
+++	/* If constant blinding was enabled and we failed during blinding
+++	 * then we must fall back to the interpreter. Otherwise, we save
+++	 * the new JITed code.
+++	 */
+++	tmp = bpf_jit_blind_constants(prog);
++ 
++-	ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL);
++-	if (ctx.offsets == NULL)
++-		return;
+++	if (IS_ERR(tmp))
+++		return orig_prog;
+++	if (tmp != prog) {
+++		tmp_blinded = true;
+++		prog = tmp;
+++	}
++ 
++-	/* fake pass to fill in the ctx->seen */
++-	if (unlikely(build_body(&ctx)))
+++	memset(&ctx, 0, sizeof(ctx));
+++	ctx.prog = prog;
+++	ctx.cpu_architecture = cpu_architecture();
+++
+++	/* Not able to allocate memory for offsets[] , then
+++	 * we must fall back to the interpreter
+++	 */
+++	ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
+++	if (ctx.offsets == NULL) {
+++		prog = orig_prog;
++ 		goto out;
+++	}
+++
+++	/* 1) fake pass to find in the length of the JITed code,
+++	 * to compute ctx->offsets and other context variables
+++	 * needed to compute final JITed code.
+++	 * Also, calculate random starting pointer/start of JITed code
+++	 * which is prefixed by random number of fault instructions.
+++	 *
+++	 * If the first pass fails then there is no chance of it
+++	 * being successful in the second pass, so just fall back
+++	 * to the interpreter.
+++	 */
+++	if (build_body(&ctx)) {
+++		prog = orig_prog;
+++		goto out_off;
+++	}
++ 
++ 	tmp_idx = ctx.idx;
++ 	build_prologue(&ctx);
++ 	ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
++ 
+++	ctx.epilogue_offset = ctx.idx;
+++
++ #if __LINUX_ARM_ARCH__ < 7
++ 	tmp_idx = ctx.idx;
++ 	build_epilogue(&ctx);
++@@ -1020,64 +1941,83 @@ void bpf_jit_compile(struct bpf_prog *fp
++ 
++ 	ctx.idx += ctx.imm_count;
++ 	if (ctx.imm_count) {
++-		ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL);
++-		if (ctx.imms == NULL)
++-			goto out;
+++		ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL);
+++		if (ctx.imms == NULL) {
+++			prog = orig_prog;
+++			goto out_off;
+++		}
++ 	}
++ #else
++-	/* there's nothing after the epilogue on ARMv7 */
+++	/* there's nothing about the epilogue on ARMv7 */
++ 	build_epilogue(&ctx);
++ #endif
++-	alloc_size = 4 * ctx.idx;
++-	header = bpf_jit_binary_alloc(alloc_size, &target_ptr,
++-				      4, jit_fill_hole);
++-	if (header == NULL)
++-		goto out;
+++	/* Now we can get the actual image size of the JITed arm code.
+++	 * Currently, we are not considering the THUMB-2 instructions
+++	 * for jit, although it can decrease the size of the image.
+++	 *
+++	 * As each arm instruction is of length 32bit, we are translating
+++	 * number of JITed intructions into the size required to store these
+++	 * JITed code.
+++	 */
+++	image_size = sizeof(u32) * ctx.idx;
++ 
++-	ctx.target = (u32 *) target_ptr;
+++	/* Now we know the size of the structure to make */
+++	header = bpf_jit_binary_alloc(image_size, &image_ptr,
+++				      sizeof(u32), jit_fill_hole);
+++	/* Not able to allocate memory for the structure then
+++	 * we must fall back to the interpretation
+++	 */
+++	if (header == NULL) {
+++		prog = orig_prog;
+++		goto out_imms;
+++	}
+++
+++	/* 2.) Actual pass to generate final JIT code */
+++	ctx.target = (u32 *) image_ptr;
++ 	ctx.idx = 0;
++ 
++ 	build_prologue(&ctx);
+++
+++	/* If building the body of the JITed code fails somehow,
+++	 * we fall back to the interpretation.
+++	 */
++ 	if (build_body(&ctx) < 0) {
++-#if __LINUX_ARM_ARCH__ < 7
++-		if (ctx.imm_count)
++-			kfree(ctx.imms);
++-#endif
+++		image_ptr = NULL;
++ 		bpf_jit_binary_free(header);
++-		goto out;
+++		prog = orig_prog;
+++		goto out_imms;
++ 	}
++ 	build_epilogue(&ctx);
++ 
+++	/* 3.) Extra pass to validate JITed Code */
+++	if (validate_code(&ctx)) {
+++		image_ptr = NULL;
+++		bpf_jit_binary_free(header);
+++		prog = orig_prog;
+++		goto out_imms;
+++	}
++ 	flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx));
++ 
+++	if (bpf_jit_enable > 1)
+++		/* there are 2 passes here */
+++		bpf_jit_dump(prog->len, image_size, 2, ctx.target);
+++
+++	bpf_jit_binary_lock_ro(header);
+++	prog->bpf_func = (void *)ctx.target;
+++	prog->jited = 1;
+++	prog->jited_len = image_size;
+++
+++out_imms:
++ #if __LINUX_ARM_ARCH__ < 7
++ 	if (ctx.imm_count)
++ 		kfree(ctx.imms);
++ #endif
++-
++-	if (bpf_jit_enable > 1)
++-		/* there are 2 passes here */
++-		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
++-
++-	set_memory_ro((unsigned long)header, header->pages);
++-	fp->bpf_func = (void *)ctx.target;
++-	fp->jited = 1;
++-out:
+++out_off:
++ 	kfree(ctx.offsets);
++-	return;
+++out:
+++	if (tmp_blinded)
+++		bpf_jit_prog_release_other(prog, prog == orig_prog ?
+++					   tmp : orig_prog);
+++	return prog;
++ }
++ 
++-void bpf_jit_free(struct bpf_prog *fp)
++-{
++-	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
++-	struct bpf_binary_header *header = (void *)addr;
++-
++-	if (!fp->jited)
++-		goto free_filter;
++-
++-	set_memory_rw(addr, header->pages);
++-	bpf_jit_binary_free(header);
++-
++-free_filter:
++-	bpf_prog_unlock_free(fp);
++-}
++--- a/arch/arm/net/bpf_jit_32.h
+++++ b/arch/arm/net/bpf_jit_32.h
++@@ -1,16 +1,14 @@
+++/* SPDX-License-Identifier: GPL-2.0-only */
++ /*
++  * Just-In-Time compiler for BPF filters on 32bit ARM
++  *
++  * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
++- *
++- * This program is free software; you can redistribute it and/or modify it
++- * under the terms of the GNU General Public License as published by the
++- * Free Software Foundation; version 2 of the License.
++  */
++ 
++ #ifndef PFILTER_OPCODES_ARM_H
++ #define PFILTER_OPCODES_ARM_H
++ 
+++/* ARM 32bit Registers */
++ #define ARM_R0	0
++ #define ARM_R1	1
++ #define ARM_R2	2
++@@ -22,40 +20,46 @@
++ #define ARM_R8	8
++ #define ARM_R9	9
++ #define ARM_R10	10
++-#define ARM_FP	11
++-#define ARM_IP	12
++-#define ARM_SP	13
++-#define ARM_LR	14
++-#define ARM_PC	15
++-
++-#define ARM_COND_EQ		0x0
++-#define ARM_COND_NE		0x1
++-#define ARM_COND_CS		0x2
+++#define ARM_FP	11	/* Frame Pointer */
+++#define ARM_IP	12	/* Intra-procedure scratch register */
+++#define ARM_SP	13	/* Stack pointer: as load/store base reg */
+++#define ARM_LR	14	/* Link Register */
+++#define ARM_PC	15	/* Program counter */
+++
+++#define ARM_COND_EQ		0x0	/* == */
+++#define ARM_COND_NE		0x1	/* != */
+++#define ARM_COND_CS		0x2	/* unsigned >= */
++ #define ARM_COND_HS		ARM_COND_CS
++-#define ARM_COND_CC		0x3
+++#define ARM_COND_CC		0x3	/* unsigned < */
++ #define ARM_COND_LO		ARM_COND_CC
++-#define ARM_COND_MI		0x4
++-#define ARM_COND_PL		0x5
++-#define ARM_COND_VS		0x6
++-#define ARM_COND_VC		0x7
++-#define ARM_COND_HI		0x8
++-#define ARM_COND_LS		0x9
++-#define ARM_COND_GE		0xa
++-#define ARM_COND_LT		0xb
++-#define ARM_COND_GT		0xc
++-#define ARM_COND_LE		0xd
++-#define ARM_COND_AL		0xe
+++#define ARM_COND_MI		0x4	/* < 0 */
+++#define ARM_COND_PL		0x5	/* >= 0 */
+++#define ARM_COND_VS		0x6	/* Signed Overflow */
+++#define ARM_COND_VC		0x7	/* No Signed Overflow */
+++#define ARM_COND_HI		0x8	/* unsigned > */
+++#define ARM_COND_LS		0x9	/* unsigned <= */
+++#define ARM_COND_GE		0xa	/* Signed >= */
+++#define ARM_COND_LT		0xb	/* Signed < */
+++#define ARM_COND_GT		0xc	/* Signed > */
+++#define ARM_COND_LE		0xd	/* Signed <= */
+++#define ARM_COND_AL		0xe	/* None */
++ 
++ /* register shift types */
++ #define SRTYPE_LSL		0
++ #define SRTYPE_LSR		1
++ #define SRTYPE_ASR		2
++ #define SRTYPE_ROR		3
+++#define SRTYPE_ASL		(SRTYPE_LSL)
++ 
++ #define ARM_INST_ADD_R		0x00800000
+++#define ARM_INST_ADDS_R		0x00900000
+++#define ARM_INST_ADC_R		0x00a00000
+++#define ARM_INST_ADC_I		0x02a00000
++ #define ARM_INST_ADD_I		0x02800000
+++#define ARM_INST_ADDS_I		0x02900000
++ 
++ #define ARM_INST_AND_R		0x00000000
+++#define ARM_INST_ANDS_R		0x00100000
++ #define ARM_INST_AND_I		0x02000000
++ 
++ #define ARM_INST_BIC_R		0x01c00000
++@@ -71,13 +75,18 @@
++ #define ARM_INST_EOR_R		0x00200000
++ #define ARM_INST_EOR_I		0x02200000
++ 
++-#define ARM_INST_LDRB_I		0x05d00000
+++#define ARM_INST_LDST__U	0x00800000
+++#define ARM_INST_LDST__IMM12	0x00000fff
+++#define ARM_INST_LDRB_I		0x05500000
++ #define ARM_INST_LDRB_R		0x07d00000
++-#define ARM_INST_LDRH_I		0x01d000b0
+++#define ARM_INST_LDRD_I		0x014000d0
+++#define ARM_INST_LDRH_I		0x015000b0
++ #define ARM_INST_LDRH_R		0x019000b0
++-#define ARM_INST_LDR_I		0x05900000
+++#define ARM_INST_LDR_I		0x05100000
+++#define ARM_INST_LDR_R		0x07900000
++ 
++ #define ARM_INST_LDM		0x08900000
+++#define ARM_INST_LDM_IA		0x08b00000
++ 
++ #define ARM_INST_LSL_I		0x01a00000
++ #define ARM_INST_LSL_R		0x01a00010
++@@ -86,6 +95,7 @@
++ #define ARM_INST_LSR_R		0x01a00030
++ 
++ #define ARM_INST_MOV_R		0x01a00000
+++#define ARM_INST_MOVS_R		0x01b00000
++ #define ARM_INST_MOV_I		0x03a00000
++ #define ARM_INST_MOVW		0x03000000
++ #define ARM_INST_MOVT		0x03400000
++@@ -96,17 +106,29 @@
++ #define ARM_INST_PUSH		0x092d0000
++ 
++ #define ARM_INST_ORR_R		0x01800000
+++#define ARM_INST_ORRS_R		0x01900000
++ #define ARM_INST_ORR_I		0x03800000
++ 
++ #define ARM_INST_REV		0x06bf0f30
++ #define ARM_INST_REV16		0x06bf0fb0
++ 
++ #define ARM_INST_RSB_I		0x02600000
+++#define ARM_INST_RSBS_I		0x02700000
+++#define ARM_INST_RSC_I		0x02e00000
++ 
++ #define ARM_INST_SUB_R		0x00400000
+++#define ARM_INST_SUBS_R		0x00500000
+++#define ARM_INST_RSB_R		0x00600000
++ #define ARM_INST_SUB_I		0x02400000
++-
++-#define ARM_INST_STR_I		0x05800000
+++#define ARM_INST_SUBS_I		0x02500000
+++#define ARM_INST_SBC_I		0x02c00000
+++#define ARM_INST_SBC_R		0x00c00000
+++#define ARM_INST_SBCS_R		0x00d00000
+++
+++#define ARM_INST_STR_I		0x05000000
+++#define ARM_INST_STRB_I		0x05400000
+++#define ARM_INST_STRD_I		0x014000f0
+++#define ARM_INST_STRH_I		0x014000b0
++ 
++ #define ARM_INST_TST_R		0x01100000
++ #define ARM_INST_TST_I		0x03100000
++@@ -117,6 +139,8 @@
++ 
++ #define ARM_INST_MLS		0x00600090
++ 
+++#define ARM_INST_UXTH		0x06ff0070
+++
++ /*
++  * Use a suitable undefined instruction to use for ARM/Thumb2 faulting.
++  * We need to be careful not to conflict with those used by other modules
++@@ -135,11 +159,18 @@
++ #define _AL3_R(op, rd, rn, rm)	((op ## _R) | (rd) << 12 | (rn) << 16 | (rm))
++ /* immediate */
++ #define _AL3_I(op, rd, rn, imm)	((op ## _I) | (rd) << 12 | (rn) << 16 | (imm))
+++/* register with register-shift */
+++#define _AL3_SR(inst)	(inst | (1 << 4))
++ 
++ #define ARM_ADD_R(rd, rn, rm)	_AL3_R(ARM_INST_ADD, rd, rn, rm)
+++#define ARM_ADDS_R(rd, rn, rm)	_AL3_R(ARM_INST_ADDS, rd, rn, rm)
++ #define ARM_ADD_I(rd, rn, imm)	_AL3_I(ARM_INST_ADD, rd, rn, imm)
+++#define ARM_ADDS_I(rd, rn, imm)	_AL3_I(ARM_INST_ADDS, rd, rn, imm)
+++#define ARM_ADC_R(rd, rn, rm)	_AL3_R(ARM_INST_ADC, rd, rn, rm)
+++#define ARM_ADC_I(rd, rn, imm)	_AL3_I(ARM_INST_ADC, rd, rn, imm)
++ 
++ #define ARM_AND_R(rd, rn, rm)	_AL3_R(ARM_INST_AND, rd, rn, rm)
+++#define ARM_ANDS_R(rd, rn, rm)	_AL3_R(ARM_INST_ANDS, rd, rn, rm)
++ #define ARM_AND_I(rd, rn, imm)	_AL3_I(ARM_INST_AND, rd, rn, imm)
++ 
++ #define ARM_BIC_R(rd, rn, rm)	_AL3_R(ARM_INST_BIC, rd, rn, rm)
++@@ -155,27 +186,38 @@
++ #define ARM_EOR_R(rd, rn, rm)	_AL3_R(ARM_INST_EOR, rd, rn, rm)
++ #define ARM_EOR_I(rd, rn, imm)	_AL3_I(ARM_INST_EOR, rd, rn, imm)
++ 
++-#define ARM_LDR_I(rt, rn, off)	(ARM_INST_LDR_I | (rt) << 12 | (rn) << 16 \
++-				 | (off))
++-#define ARM_LDRB_I(rt, rn, off)	(ARM_INST_LDRB_I | (rt) << 12 | (rn) << 16 \
++-				 | (off))
++-#define ARM_LDRB_R(rt, rn, rm)	(ARM_INST_LDRB_R | (rt) << 12 | (rn) << 16 \
+++#define ARM_LDR_R(rt, rn, rm)	(ARM_INST_LDR_R | ARM_INST_LDST__U \
+++				 | (rt) << 12 | (rn) << 16 \
++ 				 | (rm))
++-#define ARM_LDRH_I(rt, rn, off)	(ARM_INST_LDRH_I | (rt) << 12 | (rn) << 16 \
++-				 | (((off) & 0xf0) << 4) | ((off) & 0xf))
++-#define ARM_LDRH_R(rt, rn, rm)	(ARM_INST_LDRH_R | (rt) << 12 | (rn) << 16 \
+++#define ARM_LDR_R_SI(rt, rn, rm, type, imm) \
+++				(ARM_INST_LDR_R | ARM_INST_LDST__U \
+++				 | (rt) << 12 | (rn) << 16 \
+++				 | (imm) << 7 | (type) << 5 | (rm))
+++#define ARM_LDRB_R(rt, rn, rm)	(ARM_INST_LDRB_R | ARM_INST_LDST__U \
+++				 | (rt) << 12 | (rn) << 16 \
+++				 | (rm))
+++#define ARM_LDRH_R(rt, rn, rm)	(ARM_INST_LDRH_R | ARM_INST_LDST__U \
+++				 | (rt) << 12 | (rn) << 16 \
++ 				 | (rm))
++ 
++ #define ARM_LDM(rn, regs)	(ARM_INST_LDM | (rn) << 16 | (regs))
+++#define ARM_LDM_IA(rn, regs)	(ARM_INST_LDM_IA | (rn) << 16 | (regs))
++ 
++ #define ARM_LSL_R(rd, rn, rm)	(_AL3_R(ARM_INST_LSL, rd, 0, rn) | (rm) << 8)
++ #define ARM_LSL_I(rd, rn, imm)	(_AL3_I(ARM_INST_LSL, rd, 0, rn) | (imm) << 7)
++ 
++ #define ARM_LSR_R(rd, rn, rm)	(_AL3_R(ARM_INST_LSR, rd, 0, rn) | (rm) << 8)
++ #define ARM_LSR_I(rd, rn, imm)	(_AL3_I(ARM_INST_LSR, rd, 0, rn) | (imm) << 7)
+++#define ARM_ASR_R(rd, rn, rm)   (_AL3_R(ARM_INST_ASR, rd, 0, rn) | (rm) << 8)
+++#define ARM_ASR_I(rd, rn, imm)  (_AL3_I(ARM_INST_ASR, rd, 0, rn) | (imm) << 7)
++ 
++ #define ARM_MOV_R(rd, rm)	_AL3_R(ARM_INST_MOV, rd, 0, rm)
+++#define ARM_MOVS_R(rd, rm)	_AL3_R(ARM_INST_MOVS, rd, 0, rm)
++ #define ARM_MOV_I(rd, imm)	_AL3_I(ARM_INST_MOV, rd, 0, imm)
+++#define ARM_MOV_SR(rd, rm, type, rs)	\
+++	(_AL3_SR(ARM_MOV_R(rd, rm)) | (type) << 5 | (rs) << 8)
+++#define ARM_MOV_SI(rd, rm, type, imm6)	\
+++	(ARM_MOV_R(rd, rm) | (type) << 5 | (imm6) << 7)
++ 
++ #define ARM_MOVW(rd, imm)	\
++ 	(ARM_INST_MOVW | ((imm) >> 12) << 16 | (rd) << 12 | ((imm) & 0x0fff))
++@@ -190,19 +232,31 @@
++ 
++ #define ARM_ORR_R(rd, rn, rm)	_AL3_R(ARM_INST_ORR, rd, rn, rm)
++ #define ARM_ORR_I(rd, rn, imm)	_AL3_I(ARM_INST_ORR, rd, rn, imm)
++-#define ARM_ORR_S(rd, rn, rm, type, rs)	\
++-	(ARM_ORR_R(rd, rn, rm) | (type) << 5 | (rs) << 7)
+++#define ARM_ORR_SR(rd, rn, rm, type, rs)	\
+++	(_AL3_SR(ARM_ORR_R(rd, rn, rm)) | (type) << 5 | (rs) << 8)
+++#define ARM_ORRS_R(rd, rn, rm)	_AL3_R(ARM_INST_ORRS, rd, rn, rm)
+++#define ARM_ORRS_SR(rd, rn, rm, type, rs)	\
+++	(_AL3_SR(ARM_ORRS_R(rd, rn, rm)) | (type) << 5 | (rs) << 8)
+++#define ARM_ORR_SI(rd, rn, rm, type, imm6)	\
+++	(ARM_ORR_R(rd, rn, rm) | (type) << 5 | (imm6) << 7)
+++#define ARM_ORRS_SI(rd, rn, rm, type, imm6)	\
+++	(ARM_ORRS_R(rd, rn, rm) | (type) << 5 | (imm6) << 7)
++ 
++ #define ARM_REV(rd, rm)		(ARM_INST_REV | (rd) << 12 | (rm))
++ #define ARM_REV16(rd, rm)	(ARM_INST_REV16 | (rd) << 12 | (rm))
++ 
++ #define ARM_RSB_I(rd, rn, imm)	_AL3_I(ARM_INST_RSB, rd, rn, imm)
+++#define ARM_RSBS_I(rd, rn, imm)	_AL3_I(ARM_INST_RSBS, rd, rn, imm)
+++#define ARM_RSC_I(rd, rn, imm)	_AL3_I(ARM_INST_RSC, rd, rn, imm)
++ 
++ #define ARM_SUB_R(rd, rn, rm)	_AL3_R(ARM_INST_SUB, rd, rn, rm)
+++#define ARM_SUBS_R(rd, rn, rm)	_AL3_R(ARM_INST_SUBS, rd, rn, rm)
+++#define ARM_RSB_R(rd, rn, rm)	_AL3_R(ARM_INST_RSB, rd, rn, rm)
+++#define ARM_SBC_R(rd, rn, rm)	_AL3_R(ARM_INST_SBC, rd, rn, rm)
+++#define ARM_SBCS_R(rd, rn, rm)	_AL3_R(ARM_INST_SBCS, rd, rn, rm)
++ #define ARM_SUB_I(rd, rn, imm)	_AL3_I(ARM_INST_SUB, rd, rn, imm)
++-
++-#define ARM_STR_I(rt, rn, off)	(ARM_INST_STR_I | (rt) << 12 | (rn) << 16 \
++-				 | (off))
+++#define ARM_SUBS_I(rd, rn, imm)	_AL3_I(ARM_INST_SUBS, rd, rn, imm)
+++#define ARM_SBC_I(rd, rn, imm)	_AL3_I(ARM_INST_SBC, rd, rn, imm)
++ 
++ #define ARM_TST_R(rn, rm)	_AL3_R(ARM_INST_TST, 0, rn, rm)
++ #define ARM_TST_I(rn, imm)	_AL3_I(ARM_INST_TST, 0, rn, imm)
++@@ -214,5 +268,6 @@
++ 
++ #define ARM_MLS(rd, rn, rm, ra)	(ARM_INST_MLS | (rd) << 16 | (rn) | (rm) << 8 \
++ 				 | (ra) << 12)
+++#define ARM_UXTH(rd, rm)	(ARM_INST_UXTH | (rd) << 12 | (rm))
++ 
++ #endif /* PFILTER_OPCODES_ARM_H */
++--- a/arch/arm/net/Makefile
+++++ b/arch/arm/net/Makefile
++@@ -1,3 +1,4 @@
+++# SPDX-License-Identifier: GPL-2.0-only
++ # ARM-specific networking code
++ 
++ obj-$(CONFIG_BPF_JIT) += bpf_jit_32.o
++--- /dev/null
+++++ b/include/linux/bpf-cgroup.h
++@@ -0,0 +1,410 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
+++#ifndef _BPF_CGROUP_H
+++#define _BPF_CGROUP_H
+++
+++#include <linux/bpf.h>
+++#include <linux/errno.h>
+++#include <linux/jump_label.h>
+++#include <linux/percpu.h>
+++#include <linux/percpu-refcount.h>
+++#include <linux/rbtree.h>
+++#include <uapi/linux/bpf.h>
+++
+++struct sock;
+++struct sockaddr;
+++struct cgroup;
+++struct sk_buff;
+++struct bpf_map;
+++struct bpf_prog;
+++struct bpf_sock_ops_kern;
+++struct bpf_cgroup_storage;
+++struct ctl_table;
+++struct ctl_table_header;
+++
+++#ifdef CONFIG_CGROUP_BPF
+++
+++extern struct static_key_false cgroup_bpf_enabled_key;
+++#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
+++
+++DECLARE_PER_CPU(struct bpf_cgroup_storage*,
+++		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+++
+++#define for_each_cgroup_storage_type(stype) \
+++	for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
+++
+++struct bpf_cgroup_storage_map;
+++
+++struct bpf_storage_buffer {
+++	struct rcu_head rcu;
+++	char data[0];
+++};
+++
+++struct bpf_cgroup_storage {
+++	union {
+++		struct bpf_storage_buffer *buf;
+++		void __percpu *percpu_buf;
+++	};
+++	struct bpf_cgroup_storage_map *map;
+++	struct bpf_cgroup_storage_key key;
+++	struct list_head list;
+++	struct rb_node node;
+++	struct rcu_head rcu;
+++};
+++
+++struct bpf_prog_list {
+++	struct list_head node;
+++	struct bpf_prog *prog;
+++	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+++};
+++
+++struct bpf_prog_array;
+++
+++struct cgroup_bpf {
+++	/* array of effective progs in this cgroup */
+++	struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE];
+++
+++	/* attached progs to this cgroup and attach flags
+++	 * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
+++	 * have either zero or one element
+++	 * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
+++	 */
+++	struct list_head progs[MAX_BPF_ATTACH_TYPE];
+++	u32 flags[MAX_BPF_ATTACH_TYPE];
+++
+++	/* temp storage for effective prog array used by prog_attach/detach */
+++	struct bpf_prog_array *inactive;
+++
+++	/* reference counter used to detach bpf programs after cgroup removal */
+++	struct percpu_ref refcnt;
+++
+++	/* cgroup_bpf is released using a work queue */
+++	struct work_struct release_work;
+++};
+++
+++int cgroup_bpf_inherit(struct cgroup *cgrp);
+++void cgroup_bpf_offline(struct cgroup *cgrp);
+++
+++int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
+++			enum bpf_attach_type type, u32 flags);
+++int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+++			enum bpf_attach_type type);
+++int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
+++		       union bpf_attr __user *uattr);
+++
+++/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
+++int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
+++		      enum bpf_attach_type type, u32 flags);
+++int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+++		      enum bpf_attach_type type, u32 flags);
+++int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
+++		     union bpf_attr __user *uattr);
+++
+++int __cgroup_bpf_run_filter_skb(struct sock *sk,
+++				struct sk_buff *skb,
+++				enum bpf_attach_type type);
+++
+++int __cgroup_bpf_run_filter_sk(struct sock *sk,
+++			       enum bpf_attach_type type);
+++
+++int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
+++				      struct sockaddr *uaddr,
+++				      enum bpf_attach_type type,
+++				      void *t_ctx);
+++
+++int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
+++				     struct bpf_sock_ops_kern *sock_ops,
+++				     enum bpf_attach_type type);
+++
+++int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
+++				      short access, enum bpf_attach_type type);
+++
+++int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
+++				   struct ctl_table *table, int write,
+++				   void __user *buf, size_t *pcount,
+++				   loff_t *ppos, void **new_buf,
+++				   enum bpf_attach_type type);
+++
+++int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
+++				       int *optname, char __user *optval,
+++				       int *optlen, char **kernel_optval);
+++int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
+++				       int optname, char __user *optval,
+++				       int __user *optlen, int max_optlen,
+++				       int retval);
+++
+++static inline enum bpf_cgroup_storage_type cgroup_storage_type(
+++	struct bpf_map *map)
+++{
+++	if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+++		return BPF_CGROUP_STORAGE_PERCPU;
+++
+++	return BPF_CGROUP_STORAGE_SHARED;
+++}
+++
+++static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
+++					  *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
+++{
+++	enum bpf_cgroup_storage_type stype;
+++
+++	for_each_cgroup_storage_type(stype)
+++		this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
+++}
+++
+++struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
+++					enum bpf_cgroup_storage_type stype);
+++void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
+++void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
+++			     struct cgroup *cgroup,
+++			     enum bpf_attach_type type);
+++void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
+++int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
+++void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
+++
+++int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
+++int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
+++				     void *value, u64 flags);
+++
+++/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
+++#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
+++({									      \
+++	int __ret = 0;							      \
+++	if (cgroup_bpf_enabled)						      \
+++		__ret = __cgroup_bpf_run_filter_skb(sk, skb,		      \
+++						    BPF_CGROUP_INET_INGRESS); \
+++									      \
+++	__ret;								      \
+++})
+++
+++#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb)			       \
+++({									       \
+++	int __ret = 0;							       \
+++	if (cgroup_bpf_enabled && sk && sk == skb->sk) {		       \
+++		typeof(sk) __sk = sk_to_full_sk(sk);			       \
+++		if (sk_fullsock(__sk))					       \
+++			__ret = __cgroup_bpf_run_filter_skb(__sk, skb,	       \
+++						      BPF_CGROUP_INET_EGRESS); \
+++	}								       \
+++	__ret;								       \
+++})
+++
+++#define BPF_CGROUP_RUN_SK_PROG(sk, type)				       \
+++({									       \
+++	int __ret = 0;							       \
+++	if (cgroup_bpf_enabled) {					       \
+++		__ret = __cgroup_bpf_run_filter_sk(sk, type);		       \
+++	}								       \
+++	__ret;								       \
+++})
+++
+++#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
+++	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
+++
+++#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk)				       \
+++	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
+++
+++#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk)				       \
+++	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
+++
+++#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type)				       \
+++({									       \
+++	int __ret = 0;							       \
+++	if (cgroup_bpf_enabled)						       \
+++		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
+++							  NULL);	       \
+++	__ret;								       \
+++})
+++
+++#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx)		       \
+++({									       \
+++	int __ret = 0;							       \
+++	if (cgroup_bpf_enabled)	{					       \
+++		lock_sock(sk);						       \
+++		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
+++							  t_ctx);	       \
+++		release_sock(sk);					       \
+++	}								       \
+++	__ret;								       \
+++})
+++
+++#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr)			       \
+++	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
+++
+++#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr)			       \
+++	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
+++
+++#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
+++					    sk->sk_prot->pre_connect)
+++
+++#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr)			       \
+++	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
+++
+++#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr)			       \
+++	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
+++
+++#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr)		       \
+++	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL)
+++
+++#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr)		       \
+++	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL)
+++
+++#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx)		       \
+++	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx)
+++
+++#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx)		       \
+++	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx)
+++
+++#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr)			\
+++	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_RECVMSG, NULL)
+++
+++#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr)			\
+++	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL)
+++
+++#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops)				       \
+++({									       \
+++	int __ret = 0;							       \
+++	if (cgroup_bpf_enabled && (sock_ops)->sk) {	       \
+++		typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk);	       \
+++		if (__sk && sk_fullsock(__sk))				       \
+++			__ret = __cgroup_bpf_run_filter_sock_ops(__sk,	       \
+++								 sock_ops,     \
+++							 BPF_CGROUP_SOCK_OPS); \
+++	}								       \
+++	__ret;								       \
+++})
+++
+++#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access)	      \
+++({									      \
+++	int __ret = 0;							      \
+++	if (cgroup_bpf_enabled)						      \
+++		__ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
+++							  access,	      \
+++							  BPF_CGROUP_DEVICE); \
+++									      \
+++	__ret;								      \
+++})
+++
+++
+++#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf)  \
+++({									       \
+++	int __ret = 0;							       \
+++	if (cgroup_bpf_enabled)						       \
+++		__ret = __cgroup_bpf_run_filter_sysctl(head, table, write,     \
+++						       buf, count, pos, nbuf,  \
+++						       BPF_CGROUP_SYSCTL);     \
+++	__ret;								       \
+++})
+++
+++#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen,   \
+++				       kernel_optval)			       \
+++({									       \
+++	int __ret = 0;							       \
+++	if (cgroup_bpf_enabled)						       \
+++		__ret = __cgroup_bpf_run_filter_setsockopt(sock, level,	       \
+++							   optname, optval,    \
+++							   optlen,	       \
+++							   kernel_optval);     \
+++	__ret;								       \
+++})
+++
+++#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen)			       \
+++({									       \
+++	int __ret = 0;							       \
+++	if (cgroup_bpf_enabled)						       \
+++		get_user(__ret, optlen);				       \
+++	__ret;								       \
+++})
+++
+++#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen,   \
+++				       max_optlen, retval)		       \
+++({									       \
+++	int __ret = retval;						       \
+++	if (cgroup_bpf_enabled)						       \
+++		__ret = __cgroup_bpf_run_filter_getsockopt(sock, level,	       \
+++							   optname, optval,    \
+++							   optlen, max_optlen, \
+++							   retval);	       \
+++	__ret;								       \
+++})
+++
+++int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+++			   enum bpf_prog_type ptype, struct bpf_prog *prog);
+++int cgroup_bpf_prog_detach(const union bpf_attr *attr,
+++			   enum bpf_prog_type ptype);
+++int cgroup_bpf_prog_query(const union bpf_attr *attr,
+++			  union bpf_attr __user *uattr);
+++#else
+++
+++struct bpf_prog;
+++struct cgroup_bpf {};
+++static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
+++static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
+++
+++static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+++					 enum bpf_prog_type ptype,
+++					 struct bpf_prog *prog)
+++{
+++	return -EINVAL;
+++}
+++
+++static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
+++					 enum bpf_prog_type ptype)
+++{
+++	return -EINVAL;
+++}
+++
+++static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
+++					union bpf_attr __user *uattr)
+++{
+++	return -EINVAL;
+++}
+++
+++static inline void bpf_cgroup_storage_set(
+++	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
+++static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
+++					    struct bpf_map *map) { return 0; }
+++static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
+++					      struct bpf_map *map) {}
+++static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
+++	struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; }
+++static inline void bpf_cgroup_storage_free(
+++	struct bpf_cgroup_storage *storage) {}
+++static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
+++						 void *value) {
+++	return 0;
+++}
+++static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
+++					void *key, void *value, u64 flags) {
+++	return 0;
+++}
+++
+++#define cgroup_bpf_enabled (0)
+++#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
+++#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; })
+++#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
+++#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
+++				       optlen, max_optlen, retval) ({ retval; })
+++#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
+++				       kernel_optval) ({ 0; })
+++
+++#define for_each_cgroup_storage_type(stype) for (; false; )
+++
+++#endif /* CONFIG_CGROUP_BPF */
+++
+++#endif /* _BPF_CGROUP_H */
++--- a/include/linux/bpf.h
+++++ b/include/linux/bpf.h
++@@ -1,55 +1,183 @@
+++/* SPDX-License-Identifier: GPL-2.0-only */
++ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
++- *
++- * This program is free software; you can redistribute it and/or
++- * modify it under the terms of version 2 of the GNU General Public
++- * License as published by the Free Software Foundation.
++  */
++ #ifndef _LINUX_BPF_H
++ #define _LINUX_BPF_H 1
++ 
++ #include <uapi/linux/bpf.h>
+++
++ #include <linux/workqueue.h>
++ #include <linux/file.h>
+++#include <linux/percpu.h>
+++#include <linux/err.h>
+++#include <linux/rbtree_latch.h>
+++#include <linux/numa.h>
+++#include <linux/wait.h>
+++#include <linux/u64_stats_sync.h>
++ 
+++struct bpf_verifier_env;
+++struct perf_event;
+++struct bpf_prog;
++ struct bpf_map;
+++struct sock;
+++struct seq_file;
+++struct btf;
+++struct btf_type;
+++
+++extern struct idr btf_idr;
+++extern spinlock_t btf_idr_lock;
++ 
++ /* map is generic key/value storage optionally accesible by eBPF programs */
++ struct bpf_map_ops {
++ 	/* funcs callable from userspace (via syscall) */
+++	int (*map_alloc_check)(union bpf_attr *attr);
++ 	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
++-	void (*map_free)(struct bpf_map *);
+++	void (*map_release)(struct bpf_map *map, struct file *map_file);
+++	void (*map_free)(struct bpf_map *map);
++ 	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+++	void (*map_release_uref)(struct bpf_map *map);
+++	void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
++ 
++ 	/* funcs callable from userspace and from eBPF programs */
++ 	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
++ 	int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
++ 	int (*map_delete_elem)(struct bpf_map *map, void *key);
+++	int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+++	int (*map_pop_elem)(struct bpf_map *map, void *value);
+++	int (*map_peek_elem)(struct bpf_map *map, void *value);
++ 
++ 	/* funcs called by prog_array and perf_event_array map */
++-	void *(*map_fd_get_ptr) (struct bpf_map *map, int fd);
++-	void (*map_fd_put_ptr) (void *ptr);
+++	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
+++				int fd);
+++	void (*map_fd_put_ptr)(void *ptr);
+++	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
+++	u32 (*map_fd_sys_lookup_elem)(void *ptr);
+++	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
+++				  struct seq_file *m);
+++	int (*map_check_btf)(const struct bpf_map *map,
+++			     const struct btf *btf,
+++			     const struct btf_type *key_type,
+++			     const struct btf_type *value_type);
+++
+++	/* Direct value access helpers. */
+++	int (*map_direct_value_addr)(const struct bpf_map *map,
+++				     u64 *imm, u32 off);
+++	int (*map_direct_value_meta)(const struct bpf_map *map,
+++				     u64 imm, u32 *off);
+++};
+++
+++struct bpf_map_memory {
+++	u32 pages;
+++	struct user_struct *user;
++ };
++ 
++ struct bpf_map {
++-	atomic_t refcnt;
+++	/* The first two cachelines with read-mostly members of which some
+++	 * are also accessed in fast-path (e.g. ops, max_entries).
+++	 */
+++	const struct bpf_map_ops *ops ____cacheline_aligned;
+++	struct bpf_map *inner_map_meta;
+++#ifdef CONFIG_SECURITY
+++	void *security;
+++#endif
++ 	enum bpf_map_type map_type;
++ 	u32 key_size;
++ 	u32 value_size;
++ 	u32 max_entries;
++-	u32 pages;
+++	u32 map_flags;
+++	int spin_lock_off; /* >=0 valid offset, <0 error */
+++	u32 id;
+++	int numa_node;
+++	u32 btf_key_type_id;
+++	u32 btf_value_type_id;
+++	struct btf *btf;
+++	struct bpf_map_memory memory;
++ 	bool unpriv_array;
++-	struct user_struct *user;
++-	const struct bpf_map_ops *ops;
++-	struct work_struct work;
+++	bool frozen; /* write-once */
+++	/* 48 bytes hole */
+++
+++	/* The 3rd and 4th cacheline with misc members to avoid false sharing
+++	 * particularly with refcounting.
+++	 */
+++	atomic_t refcnt ____cacheline_aligned;
++ 	atomic_t usercnt;
+++	struct work_struct work;
+++	char name[BPF_OBJ_NAME_LEN];
++ };
++ 
++-struct bpf_map_type_list {
++-	struct list_head list_node;
++-	const struct bpf_map_ops *ops;
++-	enum bpf_map_type type;
+++static inline bool map_value_has_spin_lock(const struct bpf_map *map)
+++{
+++	return map->spin_lock_off >= 0;
+++}
+++
+++static inline void check_and_init_map_lock(struct bpf_map *map, void *dst)
+++{
+++	if (likely(!map_value_has_spin_lock(map)))
+++		return;
+++	*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
+++		(struct bpf_spin_lock){};
+++}
+++
+++/* copy everything but bpf_spin_lock */
+++static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
+++{
+++	if (unlikely(map_value_has_spin_lock(map))) {
+++		u32 off = map->spin_lock_off;
+++
+++		memcpy(dst, src, off);
+++		memcpy(dst + off + sizeof(struct bpf_spin_lock),
+++		       src + off + sizeof(struct bpf_spin_lock),
+++		       map->value_size - off - sizeof(struct bpf_spin_lock));
+++	} else {
+++		memcpy(dst, src, map->value_size);
+++	}
+++}
+++void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
+++			   bool lock_src);
+++
+++struct bpf_offload_dev;
+++struct bpf_offloaded_map;
+++
+++struct bpf_map_dev_ops {
+++	int (*map_get_next_key)(struct bpf_offloaded_map *map,
+++				void *key, void *next_key);
+++	int (*map_lookup_elem)(struct bpf_offloaded_map *map,
+++			       void *key, void *value);
+++	int (*map_update_elem)(struct bpf_offloaded_map *map,
+++			       void *key, void *value, u64 flags);
+++	int (*map_delete_elem)(struct bpf_offloaded_map *map, void *key);
++ };
++ 
+++struct bpf_offloaded_map {
+++	struct bpf_map map;
+++	struct net_device *netdev;
+++	const struct bpf_map_dev_ops *dev_ops;
+++	void *dev_priv;
+++	struct list_head offloads;
+++};
+++
+++static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
+++{
+++	return container_of(map, struct bpf_offloaded_map, map);
+++}
+++
+++static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
+++{
+++	return map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
+++}
+++
+++static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
+++{
+++	return map->btf && map->ops->map_seq_show_elem;
+++}
+++
+++int map_check_no_btf(const struct bpf_map *map,
+++		     const struct btf *btf,
+++		     const struct btf_type *key_type,
+++		     const struct btf_type *value_type);
+++
+++extern const struct bpf_map_ops bpf_map_offload_ops;
+++
++ /* function argument constraints */
++ enum bpf_arg_type {
++ 	ARG_DONTCARE = 0,	/* unused argument in helper function */
++@@ -60,22 +188,40 @@ enum bpf_arg_type {
++ 	ARG_CONST_MAP_PTR,	/* const argument used as pointer to bpf_map */
++ 	ARG_PTR_TO_MAP_KEY,	/* pointer to stack used as map key */
++ 	ARG_PTR_TO_MAP_VALUE,	/* pointer to stack used as map value */
+++	ARG_PTR_TO_UNINIT_MAP_VALUE,	/* pointer to valid memory used to store a map value */
+++	ARG_PTR_TO_MAP_VALUE_OR_NULL,	/* pointer to stack used as map value or NULL */
++ 
++ 	/* the following constraints used to prototype bpf_memcmp() and other
++ 	 * functions that access data on eBPF program stack
++ 	 */
++-	ARG_PTR_TO_STACK,	/* any pointer to eBPF program stack */
++-	ARG_CONST_STACK_SIZE,	/* number of bytes accessed from stack */
+++	ARG_PTR_TO_MEM,		/* pointer to valid memory (stack, packet, map value) */
+++	ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */
+++	ARG_PTR_TO_UNINIT_MEM,	/* pointer to memory does not need to be initialized,
+++				 * helper function must fill all bytes or clear
+++				 * them in error case.
+++				 */
+++
+++	ARG_CONST_SIZE,		/* number of bytes accessed from memory */
+++	ARG_CONST_SIZE_OR_ZERO,	/* number of bytes accessed from memory or 0 */
++ 
++ 	ARG_PTR_TO_CTX,		/* pointer to context */
++ 	ARG_ANYTHING,		/* any (initialized) argument is ok */
+++	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
+++	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
+++	ARG_PTR_TO_INT,		/* pointer to int */
+++	ARG_PTR_TO_LONG,	/* pointer to long */
+++	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock (fullsock) */
++ };
++ 
++ /* type of values returned from helper functions */
++ enum bpf_return_type {
++ 	RET_INTEGER,			/* function returns integer */
++ 	RET_VOID,			/* function doesn't return anything */
+++	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
++ 	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
+++	RET_PTR_TO_SOCKET_OR_NULL,	/* returns a pointer to a socket or NULL */
+++	RET_PTR_TO_TCP_SOCK_OR_NULL,	/* returns a pointer to a tcp_sock or NULL */
+++	RET_PTR_TO_SOCK_COMMON_OR_NULL,	/* returns a pointer to a sock_common or NULL */
++ };
++ 
++ /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
++@@ -85,6 +231,7 @@ enum bpf_return_type {
++ struct bpf_func_proto {
++ 	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
++ 	bool gpl_only;
+++	bool pkt_access;
++ 	enum bpf_return_type ret_type;
++ 	enum bpf_arg_type arg1_type;
++ 	enum bpf_arg_type arg2_type;
++@@ -104,35 +251,172 @@ enum bpf_access_type {
++ 	BPF_WRITE = 2
++ };
++ 
++-struct bpf_prog;
+++/* types of values stored in eBPF registers */
+++/* Pointer types represent:
+++ * pointer
+++ * pointer + imm
+++ * pointer + (u16) var
+++ * pointer + (u16) var + imm
+++ * if (range > 0) then [ptr, ptr + range - off) is safe to access
+++ * if (id > 0) means that some 'var' was added
+++ * if (off > 0) means that 'imm' was added
+++ */
+++enum bpf_reg_type {
+++	NOT_INIT = 0,		 /* nothing was written into register */
+++	SCALAR_VALUE,		 /* reg doesn't contain a valid pointer */
+++	PTR_TO_CTX,		 /* reg points to bpf_context */
+++	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
+++	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
+++	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
+++	PTR_TO_STACK,		 /* reg == frame_pointer + offset */
+++	PTR_TO_PACKET_META,	 /* skb->data - meta_len */
+++	PTR_TO_PACKET,		 /* reg points to skb->data */
+++	PTR_TO_PACKET_END,	 /* skb->data + headlen */
+++	PTR_TO_FLOW_KEYS,	 /* reg points to bpf_flow_keys */
+++	PTR_TO_SOCKET,		 /* reg points to struct bpf_sock */
+++	PTR_TO_SOCKET_OR_NULL,	 /* reg points to struct bpf_sock or NULL */
+++	PTR_TO_SOCK_COMMON,	 /* reg points to sock_common */
+++	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
+++	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
+++	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
+++	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
+++	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */
+++};
+++
+++/* The information passed from prog-specific *_is_valid_access
+++ * back to the verifier.
+++ */
+++struct bpf_insn_access_aux {
+++	enum bpf_reg_type reg_type;
+++	int ctx_field_size;
+++};
+++
+++static inline void
+++bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
+++{
+++	aux->ctx_field_size = size;
+++}
+++
+++struct bpf_prog_ops {
+++	int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
+++			union bpf_attr __user *uattr);
+++};
++ 
++ struct bpf_verifier_ops {
++ 	/* return eBPF function prototype for verification */
++-	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
+++	const struct bpf_func_proto *
+++	(*get_func_proto)(enum bpf_func_id func_id,
+++			  const struct bpf_prog *prog);
++ 
++ 	/* return true if 'size' wide access at offset 'off' within bpf_context
++ 	 * with 'type' (read or write) is allowed
++ 	 */
++-	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
+++	bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
+++				const struct bpf_prog *prog,
+++				struct bpf_insn_access_aux *info);
+++	int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
+++			    const struct bpf_prog *prog);
+++	int (*gen_ld_abs)(const struct bpf_insn *orig,
+++			  struct bpf_insn *insn_buf);
+++	u32 (*convert_ctx_access)(enum bpf_access_type type,
+++				  const struct bpf_insn *src,
+++				  struct bpf_insn *dst,
+++				  struct bpf_prog *prog, u32 *target_size);
+++};
++ 
++-	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
++-				  int src_reg, int ctx_off,
++-				  struct bpf_insn *insn, struct bpf_prog *prog);
+++struct bpf_prog_offload_ops {
+++	/* verifier basic callbacks */
+++	int (*insn_hook)(struct bpf_verifier_env *env,
+++			 int insn_idx, int prev_insn_idx);
+++	int (*finalize)(struct bpf_verifier_env *env);
+++	/* verifier optimization callbacks (called after .finalize) */
+++	int (*replace_insn)(struct bpf_verifier_env *env, u32 off,
+++			    struct bpf_insn *insn);
+++	int (*remove_insns)(struct bpf_verifier_env *env, u32 off, u32 cnt);
+++	/* program management callbacks */
+++	int (*prepare)(struct bpf_prog *prog);
+++	int (*translate)(struct bpf_prog *prog);
+++	void (*destroy)(struct bpf_prog *prog);
++ };
++ 
++-struct bpf_prog_type_list {
++-	struct list_head list_node;
++-	const struct bpf_verifier_ops *ops;
++-	enum bpf_prog_type type;
+++struct bpf_prog_offload {
+++	struct bpf_prog		*prog;
+++	struct net_device	*netdev;
+++	struct bpf_offload_dev	*offdev;
+++	void			*dev_priv;
+++	struct list_head	offloads;
+++	bool			dev_state;
+++	bool			opt_failed;
+++	void			*jited_image;
+++	u32			jited_len;
+++};
+++
+++enum bpf_cgroup_storage_type {
+++	BPF_CGROUP_STORAGE_SHARED,
+++	BPF_CGROUP_STORAGE_PERCPU,
+++	__BPF_CGROUP_STORAGE_MAX
+++};
+++
+++#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
+++
+++struct bpf_prog_stats {
+++	u64 cnt;
+++	u64 nsecs;
+++	struct u64_stats_sync syncp;
++ };
++ 
++ struct bpf_prog_aux {
++ 	atomic_t refcnt;
++ 	u32 used_map_cnt;
++-	const struct bpf_verifier_ops *ops;
+++	u32 max_ctx_offset;
+++	u32 max_pkt_offset;
+++	u32 max_tp_access;
+++	u32 stack_depth;
+++	u32 id;
+++	u32 func_cnt; /* used by non-func prog as the number of func progs */
+++	u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
+++	bool verifier_zext; /* Zero extensions has been inserted by verifier. */
+++	bool offload_requested;
+++	struct bpf_prog **func;
+++	void *jit_data; /* JIT specific data. arch dependent */
+++	struct latch_tree_node ksym_tnode;
+++	struct list_head ksym_lnode;
+++	const struct bpf_prog_ops *ops;
++ 	struct bpf_map **used_maps;
++ 	struct bpf_prog *prog;
++ 	struct user_struct *user;
+++	u64 load_time; /* ns since boottime */
+++	struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+++	char name[BPF_OBJ_NAME_LEN];
+++#ifdef CONFIG_SECURITY
+++	void *security;
+++#endif
+++	struct bpf_prog_offload *offload;
+++	struct btf *btf;
+++	struct bpf_func_info *func_info;
+++	/* bpf_line_info loaded from userspace.  linfo->insn_off
+++	 * has the xlated insn offset.
+++	 * Both the main and sub prog share the same linfo.
+++	 * The subprog can access its first linfo by
+++	 * using the linfo_idx.
+++	 */
+++	struct bpf_line_info *linfo;
+++	/* jited_linfo is the jited addr of the linfo.  It has a
+++	 * one to one mapping to linfo:
+++	 * jited_linfo[i] is the jited addr for the linfo[i]->insn_off.
+++	 * Both the main and sub prog share the same jited_linfo.
+++	 * The subprog can access its first jited_linfo by
+++	 * using the linfo_idx.
+++	 */
+++	void **jited_linfo;
+++	u32 func_info_cnt;
+++	u32 nr_linfo;
+++	/* subprog can use linfo_idx to access its first linfo and
+++	 * jited_linfo.
+++	 * main prog always has linfo_idx == 0
+++	 */
+++	u32 linfo_idx;
+++	struct bpf_prog_stats __percpu *stats;
++ 	union {
++ 		struct work_struct work;
++ 		struct rcu_head	rcu;
++@@ -153,76 +437,688 @@ struct bpf_array {
++ 	union {
++ 		char value[0] __aligned(8);
++ 		void *ptrs[0] __aligned(8);
+++		void __percpu *pptrs[0] __aligned(8);
++ 	};
++ };
+++
+++#define BPF_COMPLEXITY_LIMIT_INSNS      1000000 /* yes. 1M insns */
++ #define MAX_TAIL_CALL_CNT 32
++ 
++-u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
++-void bpf_fd_array_map_clear(struct bpf_map *map);
+++#define BPF_F_ACCESS_MASK	(BPF_F_RDONLY |		\
+++				 BPF_F_RDONLY_PROG |	\
+++				 BPF_F_WRONLY |		\
+++				 BPF_F_WRONLY_PROG)
+++
+++#define BPF_MAP_CAN_READ	BIT(0)
+++#define BPF_MAP_CAN_WRITE	BIT(1)
+++
+++static inline u32 bpf_map_flags_to_cap(struct bpf_map *map)
+++{
+++	u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
+++
+++	/* Combination of BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG is
+++	 * not possible.
+++	 */
+++	if (access_flags & BPF_F_RDONLY_PROG)
+++		return BPF_MAP_CAN_READ;
+++	else if (access_flags & BPF_F_WRONLY_PROG)
+++		return BPF_MAP_CAN_WRITE;
+++	else
+++		return BPF_MAP_CAN_READ | BPF_MAP_CAN_WRITE;
+++}
+++
+++static inline bool bpf_map_flags_access_ok(u32 access_flags)
+++{
+++	return (access_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) !=
+++	       (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
+++}
+++
+++struct bpf_event_entry {
+++	struct perf_event *event;
+++	struct file *perf_file;
+++	struct file *map_file;
+++	struct rcu_head rcu;
+++};
+++
++ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+++int bpf_prog_calc_tag(struct bpf_prog *fp);
+++
++ const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
++ 
+++typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
+++					unsigned long off, unsigned long len);
+++typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
+++					const struct bpf_insn *src,
+++					struct bpf_insn *dst,
+++					struct bpf_prog *prog,
+++					u32 *target_size);
+++
+++u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
+++		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
+++
+++/* an array of programs to be executed under rcu_lock.
+++ *
+++ * Typical usage:
+++ * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN);
+++ *
+++ * the structure returned by bpf_prog_array_alloc() should be populated
+++ * with program pointers and the last pointer must be NULL.
+++ * The user has to keep refcnt on the program and make sure the program
+++ * is removed from the array before bpf_prog_put().
+++ * The 'struct bpf_prog_array *' should only be replaced with xchg()
+++ * since other cpus are walking the array of pointers in parallel.
+++ */
+++struct bpf_prog_array_item {
+++	struct bpf_prog *prog;
+++	struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+++};
+++
+++struct bpf_prog_array {
+++	struct rcu_head rcu;
+++	struct bpf_prog_array_item items[0];
+++};
+++
+++struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
+++void bpf_prog_array_free(struct bpf_prog_array *progs);
+++int bpf_prog_array_length(struct bpf_prog_array *progs);
+++bool bpf_prog_array_is_empty(struct bpf_prog_array *array);
+++int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
+++				__u32 __user *prog_ids, u32 cnt);
+++
+++void bpf_prog_array_delete_safe(struct bpf_prog_array *progs,
+++				struct bpf_prog *old_prog);
+++int bpf_prog_array_copy_info(struct bpf_prog_array *array,
+++			     u32 *prog_ids, u32 request_cnt,
+++			     u32 *prog_cnt);
+++int bpf_prog_array_copy(struct bpf_prog_array *old_array,
+++			struct bpf_prog *exclude_prog,
+++			struct bpf_prog *include_prog,
+++			struct bpf_prog_array **new_array);
+++
+++#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \
+++	({						\
+++		struct bpf_prog_array_item *_item;	\
+++		struct bpf_prog *_prog;			\
+++		struct bpf_prog_array *_array;		\
+++		u32 _ret = 1;				\
+++		preempt_disable();			\
+++		rcu_read_lock();			\
+++		_array = rcu_dereference(array);	\
+++		if (unlikely(check_non_null && !_array))\
+++			goto _out;			\
+++		_item = &_array->items[0];		\
+++		while ((_prog = READ_ONCE(_item->prog))) {		\
+++			if (set_cg_storage)		\
+++				bpf_cgroup_storage_set(_item->cgroup_storage);	\
+++			_ret &= func(_prog, ctx);	\
+++			_item++;			\
+++		}					\
+++_out:							\
+++		rcu_read_unlock();			\
+++		preempt_enable();			\
+++		_ret;					\
+++	 })
+++
+++/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
+++ * so BPF programs can request cwr for TCP packets.
+++ *
+++ * Current cgroup skb programs can only return 0 or 1 (0 to drop the
+++ * packet. This macro changes the behavior so the low order bit
+++ * indicates whether the packet should be dropped (0) or not (1)
+++ * and the next bit is a congestion notification bit. This could be
+++ * used by TCP to call tcp_enter_cwr()
+++ *
+++ * Hence, new allowed return values of CGROUP EGRESS BPF programs are:
+++ *   0: drop packet
+++ *   1: keep packet
+++ *   2: drop packet and cn
+++ *   3: keep packet and cn
+++ *
+++ * This macro then converts it to one of the NET_XMIT or an error
+++ * code that is then interpreted as drop packet (and no cn):
+++ *   0: NET_XMIT_SUCCESS  skb should be transmitted
+++ *   1: NET_XMIT_DROP     skb should be dropped and cn
+++ *   2: NET_XMIT_CN       skb should be transmitted and cn
+++ *   3: -EPERM            skb should be dropped
+++ */
+++#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func)		\
+++	({						\
+++		struct bpf_prog_array_item *_item;	\
+++		struct bpf_prog *_prog;			\
+++		struct bpf_prog_array *_array;		\
+++		u32 ret;				\
+++		u32 _ret = 1;				\
+++		u32 _cn = 0;				\
+++		preempt_disable();			\
+++		rcu_read_lock();			\
+++		_array = rcu_dereference(array);	\
+++		_item = &_array->items[0];		\
+++		while ((_prog = READ_ONCE(_item->prog))) {		\
+++			bpf_cgroup_storage_set(_item->cgroup_storage);	\
+++			ret = func(_prog, ctx);		\
+++			_ret &= (ret & 1);		\
+++			_cn |= (ret & 2);		\
+++			_item++;			\
+++		}					\
+++		rcu_read_unlock();			\
+++		preempt_enable();			\
+++		if (_ret)				\
+++			_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);	\
+++		else					\
+++			_ret = (_cn ? NET_XMIT_DROP : -EPERM);		\
+++		_ret;					\
+++	})
+++
+++#define BPF_PROG_RUN_ARRAY(array, ctx, func)		\
+++	__BPF_PROG_RUN_ARRAY(array, ctx, func, false, true)
+++
+++#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func)	\
+++	__BPF_PROG_RUN_ARRAY(array, ctx, func, true, false)
+++
++ #ifdef CONFIG_BPF_SYSCALL
++-void bpf_register_prog_type(struct bpf_prog_type_list *tl);
++-void bpf_register_map_type(struct bpf_map_type_list *tl);
+++DECLARE_PER_CPU(int, bpf_prog_active);
+++
+++extern const struct file_operations bpf_map_fops;
+++extern const struct file_operations bpf_prog_fops;
+++
+++#define BPF_PROG_TYPE(_id, _name) \
+++	extern const struct bpf_prog_ops _name ## _prog_ops; \
+++	extern const struct bpf_verifier_ops _name ## _verifier_ops;
+++#define BPF_MAP_TYPE(_id, _ops) \
+++	extern const struct bpf_map_ops _ops;
+++#include <linux/bpf_types.h>
+++#undef BPF_PROG_TYPE
+++#undef BPF_MAP_TYPE
+++
+++extern const struct bpf_prog_ops bpf_offload_prog_ops;
+++extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops;
+++extern const struct bpf_verifier_ops xdp_analyzer_ops;
++ 
++ struct bpf_prog *bpf_prog_get(u32 ufd);
++-struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
+++struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
+++				       bool attach_drv);
+++struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
+++void bpf_prog_sub(struct bpf_prog *prog, int i);
+++struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog);
+++struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog);
++ void bpf_prog_put(struct bpf_prog *prog);
++-void bpf_prog_put_rcu(struct bpf_prog *prog);
+++int __bpf_prog_charge(struct user_struct *user, u32 pages);
+++void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
+++
+++void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
+++void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
++ 
++ struct bpf_map *bpf_map_get_with_uref(u32 ufd);
++ struct bpf_map *__bpf_map_get(struct fd f);
++-struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref);
+++struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
+++struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map,
+++						   bool uref);
++ void bpf_map_put_with_uref(struct bpf_map *map);
++ void bpf_map_put(struct bpf_map *map);
+++int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
+++void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
+++int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size);
+++void bpf_map_charge_finish(struct bpf_map_memory *mem);
+++void bpf_map_charge_move(struct bpf_map_memory *dst,
+++			 struct bpf_map_memory *src);
+++void *bpf_map_area_alloc(u64 size, int numa_node);
+++void bpf_map_area_free(void *base);
+++void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
++ 
++ extern int sysctl_unprivileged_bpf_disabled;
++ 
++-int bpf_map_new_fd(struct bpf_map *map);
+++int bpf_map_new_fd(struct bpf_map *map, int flags);
++ int bpf_prog_new_fd(struct bpf_prog *prog);
++ 
++ int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
++-int bpf_obj_get_user(const char __user *pathname);
+++int bpf_obj_get_user(const char __user *pathname, int flags);
+++
+++int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
+++int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
+++int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
+++			   u64 flags);
+++int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
+++			    u64 flags);
+++
+++int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
+++
+++int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
+++				 void *key, void *value, u64 map_flags);
+++int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
+++int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
+++				void *key, void *value, u64 map_flags);
+++int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
+++
+++int bpf_get_file_flag(int flags);
+++int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
+++			     size_t actual_size);
+++
+++/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
+++ * forced to use 'long' read/writes to try to atomically copy long counters.
+++ * Best-effort only.  No barriers here, since it _will_ race with concurrent
+++ * updates from BPF programs. Called from bpf syscall and mostly used with
+++ * size 8 or 16 bytes, so ask compiler to inline it.
+++ */
+++static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
+++{
+++	const long *lsrc = src;
+++	long *ldst = dst;
+++
+++	size /= sizeof(long);
+++	while (size--)
+++		*ldst++ = *lsrc++;
+++}
++ 
++ /* verify correctness of eBPF program */
++-int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
++-#else
++-static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
+++int bpf_check(struct bpf_prog **fp, union bpf_attr *attr,
+++	      union bpf_attr __user *uattr);
+++
+++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+++void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
+++#endif
+++
+++/* Map specifics */
+++struct xdp_buff;
+++struct sk_buff;
+++
+++struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+++struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
+++void __dev_map_flush(struct bpf_map *map);
+++int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+++		    struct net_device *dev_rx);
+++int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
+++			     struct bpf_prog *xdp_prog);
+++
+++struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
+++void __cpu_map_flush(struct bpf_map *map);
+++int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
+++		    struct net_device *dev_rx);
+++
+++/* Return map's numa specified by userspace */
+++static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
++ {
+++	return (attr->map_flags & BPF_F_NUMA_NODE) ?
+++		attr->numa_node : NUMA_NO_NODE;
++ }
++ 
+++struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
+++int array_map_alloc_check(union bpf_attr *attr);
+++
+++int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
+++			  union bpf_attr __user *uattr);
+++int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
+++			  union bpf_attr __user *uattr);
+++int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+++				     const union bpf_attr *kattr,
+++				     union bpf_attr __user *uattr);
+++#else /* !CONFIG_BPF_SYSCALL */
++ static inline struct bpf_prog *bpf_prog_get(u32 ufd)
++ {
++ 	return ERR_PTR(-EOPNOTSUPP);
++ }
++ 
+++static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd,
+++						     enum bpf_prog_type type,
+++						     bool attach_drv)
+++{
+++	return ERR_PTR(-EOPNOTSUPP);
+++}
+++
+++static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog,
+++							  int i)
+++{
+++	return ERR_PTR(-EOPNOTSUPP);
+++}
+++
+++static inline void bpf_prog_sub(struct bpf_prog *prog, int i)
+++{
+++}
+++
++ static inline void bpf_prog_put(struct bpf_prog *prog)
++ {
++ }
++ 
++-static inline void bpf_prog_put_rcu(struct bpf_prog *prog)
+++static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog)
+++{
+++	return ERR_PTR(-EOPNOTSUPP);
+++}
+++
+++static inline struct bpf_prog *__must_check
+++bpf_prog_inc_not_zero(struct bpf_prog *prog)
+++{
+++	return ERR_PTR(-EOPNOTSUPP);
+++}
+++
+++static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
+++{
+++	return 0;
+++}
+++
+++static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+++{
+++}
+++
+++static inline int bpf_obj_get_user(const char __user *pathname, int flags)
+++{
+++	return -EOPNOTSUPP;
+++}
+++
+++static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
+++						       u32 key)
+++{
+++	return NULL;
+++}
+++
+++static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map,
+++							     u32 key)
+++{
+++	return NULL;
+++}
+++
+++static inline void __dev_map_flush(struct bpf_map *map)
+++{
+++}
+++
+++struct xdp_buff;
+++struct bpf_dtab_netdev;
+++
+++static inline
+++int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+++		    struct net_device *dev_rx)
+++{
+++	return 0;
+++}
+++
+++struct sk_buff;
+++
+++static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
+++					   struct sk_buff *skb,
+++					   struct bpf_prog *xdp_prog)
+++{
+++	return 0;
+++}
+++
+++static inline
+++struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
+++{
+++	return NULL;
+++}
+++
+++static inline void __cpu_map_flush(struct bpf_map *map)
+++{
+++}
+++
+++static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
+++				  struct xdp_buff *xdp,
+++				  struct net_device *dev_rx)
+++{
+++	return 0;
+++}
+++
+++static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
+++				enum bpf_prog_type type)
+++{
+++	return ERR_PTR(-EOPNOTSUPP);
+++}
+++
+++static inline int bpf_prog_test_run_xdp(struct bpf_prog *prog,
+++					const union bpf_attr *kattr,
+++					union bpf_attr __user *uattr)
+++{
+++	return -ENOTSUPP;
+++}
+++
+++static inline int bpf_prog_test_run_skb(struct bpf_prog *prog,
+++					const union bpf_attr *kattr,
+++					union bpf_attr __user *uattr)
+++{
+++	return -ENOTSUPP;
+++}
+++
+++static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+++						   const union bpf_attr *kattr,
+++						   union bpf_attr __user *uattr)
+++{
+++	return -ENOTSUPP;
+++}
+++#endif /* CONFIG_BPF_SYSCALL */
+++
+++static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
+++						 enum bpf_prog_type type)
+++{
+++	return bpf_prog_get_type_dev(ufd, type, false);
+++}
+++
+++bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);
+++
+++#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
+++
+++static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux)
+++{
+++	return aux->offload_requested;
+++}
+++
+++static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
+++{
+++	return false;
+++}
+++
+++#else
+++static inline int bpf_prog_offload_init(struct bpf_prog *prog,
+++					union bpf_attr *attr)
+++{
+++	return -EOPNOTSUPP;
+++}
+++
+++static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
+++{
+++	return false;
+++}
+++
+++static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
+++{
+++	return false;
+++}
+++
+++#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
+++
+++#if defined(CONFIG_BPF_STREAM_PARSER)
+++int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+++			 struct bpf_prog *old, u32 which);
+++int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
+++int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
+++#else
+++static inline int sock_map_prog_update(struct bpf_map *map,
+++				       struct bpf_prog *prog,
+++				       struct bpf_prog *old, u32 which)
++ {
+++	return -EOPNOTSUPP;
+++}
+++
+++static inline int sock_map_get_from_fd(const union bpf_attr *attr,
+++				       struct bpf_prog *prog)
+++{
+++	return -EINVAL;
+++}
+++
+++static inline int sock_map_prog_detach(const union bpf_attr *attr,
+++				       enum bpf_prog_type ptype)
+++{
+++	return -EOPNOTSUPP;
+++}
+++#endif
+++
+++#if defined(CONFIG_XDP_SOCKETS)
+++struct xdp_sock;
+++struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
+++int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+++		       struct xdp_sock *xs);
+++void __xsk_map_flush(struct bpf_map *map);
+++#else
+++struct xdp_sock;
+++static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+++						     u32 key)
+++{
+++	return NULL;
+++}
+++
+++static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+++				     struct xdp_sock *xs)
+++{
+++	return -EOPNOTSUPP;
+++}
+++
+++static inline void __xsk_map_flush(struct bpf_map *map)
+++{
+++}
+++#endif
+++
+++#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
+++void bpf_sk_reuseport_detach(struct sock *sk);
+++int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+++				       void *value);
+++int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+++				       void *value, u64 map_flags);
+++#else
+++static inline void bpf_sk_reuseport_detach(struct sock *sk)
+++{
+++}
+++
+++#ifdef CONFIG_BPF_SYSCALL
+++static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
+++						     void *key, void *value)
+++{
+++	return -EOPNOTSUPP;
+++}
+++
+++static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
+++						     void *key, void *value,
+++						     u64 map_flags)
+++{
+++	return -EOPNOTSUPP;
++ }
++ #endif /* CONFIG_BPF_SYSCALL */
+++#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */
++ 
++ /* verifier prototypes for helper functions called from eBPF programs */
++ extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
++ extern const struct bpf_func_proto bpf_map_update_elem_proto;
++ extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+++extern const struct bpf_func_proto bpf_map_push_elem_proto;
+++extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+++extern const struct bpf_func_proto bpf_map_peek_elem_proto;
++ 
++ extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
++ extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
+++extern const struct bpf_func_proto bpf_get_numa_node_id_proto;
++ extern const struct bpf_func_proto bpf_tail_call_proto;
++ extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
++ extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
++ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
++ extern const struct bpf_func_proto bpf_get_current_comm_proto;
++-extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
++-extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
+++extern const struct bpf_func_proto bpf_get_stackid_proto;
+++extern const struct bpf_func_proto bpf_get_stack_proto;
+++extern const struct bpf_func_proto bpf_sock_map_update_proto;
+++extern const struct bpf_func_proto bpf_sock_hash_update_proto;
+++extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+++extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
+++extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
+++extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
+++extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
+++extern const struct bpf_func_proto bpf_spin_lock_proto;
+++extern const struct bpf_func_proto bpf_spin_unlock_proto;
+++extern const struct bpf_func_proto bpf_get_local_storage_proto;
+++extern const struct bpf_func_proto bpf_strtol_proto;
+++extern const struct bpf_func_proto bpf_strtoul_proto;
+++extern const struct bpf_func_proto bpf_tcp_sock_proto;
++ 
++ /* Shared helpers among cBPF and eBPF. */
++ void bpf_user_rnd_init_once(void);
++ u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
++ 
+++#if defined(CONFIG_NET)
+++bool bpf_sock_common_is_valid_access(int off, int size,
+++				     enum bpf_access_type type,
+++				     struct bpf_insn_access_aux *info);
+++bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+++			      struct bpf_insn_access_aux *info);
+++u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+++				const struct bpf_insn *si,
+++				struct bpf_insn *insn_buf,
+++				struct bpf_prog *prog,
+++				u32 *target_size);
+++#else
+++static inline bool bpf_sock_common_is_valid_access(int off, int size,
+++						   enum bpf_access_type type,
+++						   struct bpf_insn_access_aux *info)
+++{
+++	return false;
+++}
+++static inline bool bpf_sock_is_valid_access(int off, int size,
+++					    enum bpf_access_type type,
+++					    struct bpf_insn_access_aux *info)
+++{
+++	return false;
+++}
+++static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+++					      const struct bpf_insn *si,
+++					      struct bpf_insn *insn_buf,
+++					      struct bpf_prog *prog,
+++					      u32 *target_size)
+++{
+++	return 0;
+++}
+++#endif
+++
+++#ifdef CONFIG_INET
+++bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+++				  struct bpf_insn_access_aux *info);
+++
+++u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+++				    const struct bpf_insn *si,
+++				    struct bpf_insn *insn_buf,
+++				    struct bpf_prog *prog,
+++				    u32 *target_size);
+++
+++bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+++				  struct bpf_insn_access_aux *info);
+++
+++u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
+++				    const struct bpf_insn *si,
+++				    struct bpf_insn *insn_buf,
+++				    struct bpf_prog *prog,
+++				    u32 *target_size);
+++#else
+++static inline bool bpf_tcp_sock_is_valid_access(int off, int size,
+++						enum bpf_access_type type,
+++						struct bpf_insn_access_aux *info)
+++{
+++	return false;
+++}
+++
+++static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+++						  const struct bpf_insn *si,
+++						  struct bpf_insn *insn_buf,
+++						  struct bpf_prog *prog,
+++						  u32 *target_size)
+++{
+++	return 0;
+++}
+++static inline bool bpf_xdp_sock_is_valid_access(int off, int size,
+++						enum bpf_access_type type,
+++						struct bpf_insn_access_aux *info)
+++{
+++	return false;
+++}
+++
+++static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
+++						  const struct bpf_insn *si,
+++						  struct bpf_insn *insn_buf,
+++						  struct bpf_prog *prog,
+++						  u32 *target_size)
+++{
+++	return 0;
+++}
+++#endif /* CONFIG_INET */
+++
++ #endif /* _LINUX_BPF_H */
++--- /dev/null
+++++ b/include/linux/bpf_trace.h
++@@ -0,0 +1,7 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
+++#ifndef __LINUX_BPF_TRACE_H__
+++#define __LINUX_BPF_TRACE_H__
+++
+++#include <trace/events/xdp.h>
+++
+++#endif /* __LINUX_BPF_TRACE_H__ */
++--- /dev/null
+++++ b/include/linux/bpf_types.h
++@@ -0,0 +1,44 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
+++/* internal file - do not include directly */
+++
+++#ifdef CONFIG_NET
+++BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
+++#endif
+++#ifdef CONFIG_BPF_EVENTS
+++BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
+++BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
+++#endif
+++
+++BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
+++BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
+++BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops)
+++BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
+++BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
+++BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
+++BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
+++BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
+++BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
+++#ifdef CONFIG_PERF_EVENTS
+++BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
+++#endif
+++BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
+++BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
+++#ifdef CONFIG_NET
+++BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+++BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
+++#endif
+++BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+++BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
++--- /dev/null
+++++ b/include/linux/bpf_verifier.h
++@@ -0,0 +1,425 @@
+++/* SPDX-License-Identifier: GPL-2.0-only */
+++/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+++ */
+++#ifndef _LINUX_BPF_VERIFIER_H
+++#define _LINUX_BPF_VERIFIER_H 1
+++
+++#include <linux/bpf.h> /* for enum bpf_reg_type */
+++#include <linux/filter.h> /* for MAX_BPF_STACK */
+++#include <linux/tnum.h>
+++
+++/* Maximum variable offset umax_value permitted when resolving memory accesses.
+++ * In practice this is far bigger than any realistic pointer offset; this limit
+++ * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
+++ */
+++#define BPF_MAX_VAR_OFF	(1 << 29)
+++/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO].  This ensures
+++ * that converting umax_value to int cannot overflow.
+++ */
+++#define BPF_MAX_VAR_SIZ	(1 << 29)
+++
+++/* Liveness marks, used for registers and spilled-regs (in stack slots).
+++ * Read marks propagate upwards until they find a write mark; they record that
+++ * "one of this state's descendants read this reg" (and therefore the reg is
+++ * relevant for states_equal() checks).
+++ * Write marks collect downwards and do not propagate; they record that "the
+++ * straight-line code that reached this state (from its parent) wrote this reg"
+++ * (and therefore that reads propagated from this state or its descendants
+++ * should not propagate to its parent).
+++ * A state with a write mark can receive read marks; it just won't propagate
+++ * them to its parent, since the write mark is a property, not of the state,
+++ * but of the link between it and its parent.  See mark_reg_read() and
+++ * mark_stack_slot_read() in kernel/bpf/verifier.c.
+++ */
+++enum bpf_reg_liveness {
+++	REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
+++	REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
+++	REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
+++	REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
+++	REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
+++	REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
+++};
+++
+++struct bpf_reg_state {
+++	/* Ordering of fields matters.  See states_equal() */
+++	enum bpf_reg_type type;
+++	union {
+++		/* valid when type == PTR_TO_PACKET */
+++		u16 range;
+++
+++		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
+++		 *   PTR_TO_MAP_VALUE_OR_NULL
+++		 */
+++		struct bpf_map *map_ptr;
+++
+++		/* Max size from any of the above. */
+++		unsigned long raw;
+++	};
+++	/* Fixed part of pointer offset, pointer types only */
+++	s32 off;
+++	/* For PTR_TO_PACKET, used to find other pointers with the same variable
+++	 * offset, so they can share range knowledge.
+++	 * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
+++	 * came from, when one is tested for != NULL.
+++	 * For PTR_TO_SOCKET this is used to share which pointers retain the
+++	 * same reference to the socket, to determine proper reference freeing.
+++	 */
+++	u32 id;
+++	/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
+++	 * from a pointer-cast helper, bpf_sk_fullsock() and
+++	 * bpf_tcp_sock().
+++	 *
+++	 * Consider the following where "sk" is a reference counted
+++	 * pointer returned from "sk = bpf_sk_lookup_tcp();":
+++	 *
+++	 * 1: sk = bpf_sk_lookup_tcp();
+++	 * 2: if (!sk) { return 0; }
+++	 * 3: fullsock = bpf_sk_fullsock(sk);
+++	 * 4: if (!fullsock) { bpf_sk_release(sk); return 0; }
+++	 * 5: tp = bpf_tcp_sock(fullsock);
+++	 * 6: if (!tp) { bpf_sk_release(sk); return 0; }
+++	 * 7: bpf_sk_release(sk);
+++	 * 8: snd_cwnd = tp->snd_cwnd;  // verifier will complain
+++	 *
+++	 * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and
+++	 * "tp" ptr should be invalidated also.  In order to do that,
+++	 * the reg holding "fullsock" and "sk" need to remember
+++	 * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id
+++	 * such that the verifier can reset all regs which have
+++	 * ref_obj_id matching the sk_reg->id.
+++	 *
+++	 * sk_reg->ref_obj_id is set to sk_reg->id at line 1.
+++	 * sk_reg->id will stay as NULL-marking purpose only.
+++	 * After NULL-marking is done, sk_reg->id can be reset to 0.
+++	 *
+++	 * After "fullsock = bpf_sk_fullsock(sk);" at line 3,
+++	 * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id.
+++	 *
+++	 * After "tp = bpf_tcp_sock(fullsock);" at line 5,
+++	 * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id
+++	 * which is the same as sk_reg->ref_obj_id.
+++	 *
+++	 * From the verifier perspective, if sk, fullsock and tp
+++	 * are not NULL, they are the same ptr with different
+++	 * reg->type.  In particular, bpf_sk_release(tp) is also
+++	 * allowed and has the same effect as bpf_sk_release(sk).
+++	 */
+++	u32 ref_obj_id;
+++	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
+++	 * the actual value.
+++	 * For pointer types, this represents the variable part of the offset
+++	 * from the pointed-to object, and is shared with all bpf_reg_states
+++	 * with the same id as us.
+++	 */
+++	struct tnum var_off;
+++	/* Used to determine if any memory access using this register will
+++	 * result in a bad access.
+++	 * These refer to the same value as var_off, not necessarily the actual
+++	 * contents of the register.
+++	 */
+++	s64 smin_value; /* minimum possible (s64)value */
+++	s64 smax_value; /* maximum possible (s64)value */
+++	u64 umin_value; /* minimum possible (u64)value */
+++	u64 umax_value; /* maximum possible (u64)value */
+++	/* parentage chain for liveness checking */
+++	struct bpf_reg_state *parent;
+++	/* Inside the callee two registers can be both PTR_TO_STACK like
+++	 * R1=fp-8 and R2=fp-8, but one of them points to this function stack
+++	 * while another to the caller's stack. To differentiate them 'frameno'
+++	 * is used which is an index in bpf_verifier_state->frame[] array
+++	 * pointing to bpf_func_state.
+++	 */
+++	u32 frameno;
+++	/* Tracks subreg definition. The stored value is the insn_idx of the
+++	 * writing insn. This is safe because subreg_def is used before any insn
+++	 * patching which only happens after main verification finished.
+++	 */
+++	s32 subreg_def;
+++	enum bpf_reg_liveness live;
+++	/* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
+++	bool precise;
+++};
+++
+++enum bpf_stack_slot_type {
+++	STACK_INVALID,    /* nothing was stored in this stack slot */
+++	STACK_SPILL,      /* register spilled into stack */
+++	STACK_MISC,	  /* BPF program wrote some data into this slot */
+++	STACK_ZERO,	  /* BPF program wrote constant zero */
+++};
+++
+++#define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
+++
+++struct bpf_stack_state {
+++	struct bpf_reg_state spilled_ptr;
+++	u8 slot_type[BPF_REG_SIZE];
+++};
+++
+++struct bpf_reference_state {
+++	/* Track each reference created with a unique id, even if the same
+++	 * instruction creates the reference multiple times (eg, via CALL).
+++	 */
+++	int id;
+++	/* Instruction where the allocation of this reference occurred. This
+++	 * is used purely to inform the user of a reference leak.
+++	 */
+++	int insn_idx;
+++};
+++
+++/* state of the program:
+++ * type of all registers and stack info
+++ */
+++struct bpf_func_state {
+++	struct bpf_reg_state regs[MAX_BPF_REG];
+++	/* index of call instruction that called into this func */
+++	int callsite;
+++	/* stack frame number of this function state from pov of
+++	 * enclosing bpf_verifier_state.
+++	 * 0 = main function, 1 = first callee.
+++	 */
+++	u32 frameno;
+++	/* subprog number == index within subprog_stack_depth
+++	 * zero == main subprog
+++	 */
+++	u32 subprogno;
+++
+++	/* The following fields should be last. See copy_func_state() */
+++	int acquired_refs;
+++	struct bpf_reference_state *refs;
+++	int allocated_stack;
+++	struct bpf_stack_state *stack;
+++};
+++
+++struct bpf_idx_pair {
+++	u32 prev_idx;
+++	u32 idx;
+++};
+++
+++#define MAX_CALL_FRAMES 8
+++struct bpf_verifier_state {
+++	/* call stack tracking */
+++	struct bpf_func_state *frame[MAX_CALL_FRAMES];
+++	struct bpf_verifier_state *parent;
+++	/*
+++	 * 'branches' field is the number of branches left to explore:
+++	 * 0 - all possible paths from this state reached bpf_exit or
+++	 * were safely pruned
+++	 * 1 - at least one path is being explored.
+++	 * This state hasn't reached bpf_exit
+++	 * 2 - at least two paths are being explored.
+++	 * This state is an immediate parent of two children.
+++	 * One is fallthrough branch with branches==1 and another
+++	 * state is pushed into stack (to be explored later) also with
+++	 * branches==1. The parent of this state has branches==1.
+++	 * The verifier state tree connected via 'parent' pointer looks like:
+++	 * 1
+++	 * 1
+++	 * 2 -> 1 (first 'if' pushed into stack)
+++	 * 1
+++	 * 2 -> 1 (second 'if' pushed into stack)
+++	 * 1
+++	 * 1
+++	 * 1 bpf_exit.
+++	 *
+++	 * Once do_check() reaches bpf_exit, it calls update_branch_counts()
+++	 * and the verifier state tree will look:
+++	 * 1
+++	 * 1
+++	 * 2 -> 1 (first 'if' pushed into stack)
+++	 * 1
+++	 * 1 -> 1 (second 'if' pushed into stack)
+++	 * 0
+++	 * 0
+++	 * 0 bpf_exit.
+++	 * After pop_stack() the do_check() will resume at second 'if'.
+++	 *
+++	 * If is_state_visited() sees a state with branches > 0 it means
+++	 * there is a loop. If such state is exactly equal to the current state
+++	 * it's an infinite loop. Note states_equal() checks for states
+++	 * equvalency, so two states being 'states_equal' does not mean
+++	 * infinite loop. The exact comparison is provided by
+++	 * states_maybe_looping() function. It's a stronger pre-check and
+++	 * much faster than states_equal().
+++	 *
+++	 * This algorithm may not find all possible infinite loops or
+++	 * loop iteration count may be too high.
+++	 * In such cases BPF_COMPLEXITY_LIMIT_INSNS limit kicks in.
+++	 */
+++	u32 branches;
+++	u32 insn_idx;
+++	u32 curframe;
+++	u32 active_spin_lock;
+++	bool speculative;
+++
+++	/* first and last insn idx of this verifier state */
+++	u32 first_insn_idx;
+++	u32 last_insn_idx;
+++	/* jmp history recorded from first to last.
+++	 * backtracking is using it to go from last to first.
+++	 * For most states jmp_history_cnt is [0-3].
+++	 * For loops can go up to ~40.
+++	 */
+++	struct bpf_idx_pair *jmp_history;
+++	u32 jmp_history_cnt;
+++};
+++
+++#define bpf_get_spilled_reg(slot, frame)				\
+++	(((slot < frame->allocated_stack / BPF_REG_SIZE) &&		\
+++	  (frame->stack[slot].slot_type[0] == STACK_SPILL))		\
+++	 ? &frame->stack[slot].spilled_ptr : NULL)
+++
+++/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
+++#define bpf_for_each_spilled_reg(iter, frame, reg)			\
+++	for (iter = 0, reg = bpf_get_spilled_reg(iter, frame);		\
+++	     iter < frame->allocated_stack / BPF_REG_SIZE;		\
+++	     iter++, reg = bpf_get_spilled_reg(iter, frame))
+++
+++/* linked list of verifier states used to prune search */
+++struct bpf_verifier_state_list {
+++	struct bpf_verifier_state state;
+++	struct bpf_verifier_state_list *next;
+++	int miss_cnt, hit_cnt;
+++};
+++
+++/* Possible states for alu_state member. */
+++#define BPF_ALU_SANITIZE_SRC		(1U << 0)
+++#define BPF_ALU_SANITIZE_DST		(1U << 1)
+++#define BPF_ALU_NEG_VALUE		(1U << 2)
+++#define BPF_ALU_NON_POINTER		(1U << 3)
+++#define BPF_ALU_IMMEDIATE		(1U << 4)
+++#define BPF_ALU_SANITIZE		(BPF_ALU_SANITIZE_SRC | \
+++					 BPF_ALU_SANITIZE_DST)
+++
+++struct bpf_insn_aux_data {
+++	union {
+++		enum bpf_reg_type ptr_type;	/* pointer type for load/store insns */
+++		unsigned long map_state;	/* pointer/poison value for maps */
+++		s32 call_imm;			/* saved imm field of call insn */
+++		u32 alu_limit;			/* limit for add/sub register with pointer */
+++		struct {
+++			u32 map_index;		/* index into used_maps[] */
+++			u32 map_off;		/* offset from value base address */
+++		};
+++	};
+++	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
+++	int sanitize_stack_off; /* stack slot to be cleared */
+++	bool seen; /* this insn was processed by the verifier */
+++	bool zext_dst; /* this insn zero extends dst reg */
+++	u8 alu_state; /* used in combination with alu_limit */
+++	bool prune_point;
+++	unsigned int orig_idx; /* original instruction index */
+++};
+++
+++#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+++
+++#define BPF_VERIFIER_TMP_LOG_SIZE	1024
+++
+++struct bpf_verifier_log {
+++	u32 level;
+++	char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
+++	char __user *ubuf;
+++	u32 len_used;
+++	u32 len_total;
+++};
+++
+++static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
+++{
+++	return log->len_used >= log->len_total - 1;
+++}
+++
+++#define BPF_LOG_LEVEL1	1
+++#define BPF_LOG_LEVEL2	2
+++#define BPF_LOG_STATS	4
+++#define BPF_LOG_LEVEL	(BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
+++#define BPF_LOG_MASK	(BPF_LOG_LEVEL | BPF_LOG_STATS)
+++
+++static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
+++{
+++	return log->level && log->ubuf && !bpf_verifier_log_full(log);
+++}
+++
+++#define BPF_MAX_SUBPROGS 256
+++
+++struct bpf_subprog_info {
+++	u32 start; /* insn idx of function entry point */
+++	u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
+++	u16 stack_depth; /* max. stack depth used by this function */
+++	bool has_tail_call;
+++};
+++
+++/* single container for all structs
+++ * one verifier_env per bpf_check() call
+++ */
+++struct bpf_verifier_env {
+++	u32 insn_idx;
+++	u32 prev_insn_idx;
+++	struct bpf_prog *prog;		/* eBPF program being verified */
+++	const struct bpf_verifier_ops *ops;
+++	struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
+++	int stack_size;			/* number of states to be processed */
+++	bool strict_alignment;		/* perform strict pointer alignment checks */
+++	bool test_state_freq;		/* test verifier with different pruning frequency */
+++	struct bpf_verifier_state *cur_state; /* current verifier state */
+++	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
+++	struct bpf_verifier_state_list *free_list;
+++	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
+++	u32 used_map_cnt;		/* number of used maps */
+++	u32 id_gen;			/* used to generate unique reg IDs */
+++	bool allow_ptr_leaks;
+++	bool seen_direct_write;
+++	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
+++	const struct bpf_line_info *prev_linfo;
+++	struct bpf_verifier_log log;
+++	struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
+++	struct {
+++		int *insn_state;
+++		int *insn_stack;
+++		int cur_stack;
+++	} cfg;
+++	u32 subprog_cnt;
+++	/* number of instructions analyzed by the verifier */
+++	u32 prev_insn_processed, insn_processed;
+++	/* number of jmps, calls, exits analyzed so far */
+++	u32 prev_jmps_processed, jmps_processed;
+++	/* total verification time */
+++	u64 verification_time;
+++	/* maximum number of verifier states kept in 'branching' instructions */
+++	u32 max_states_per_insn;
+++	/* total number of allocated verifier states */
+++	u32 total_states;
+++	/* some states are freed during program analysis.
+++	 * this is peak number of states. this number dominates kernel
+++	 * memory consumption during verification
+++	 */
+++	u32 peak_states;
+++	/* longest register parentage chain walked for liveness marking */
+++	u32 longest_mark_read_walk;
+++};
+++
+++__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
+++				      const char *fmt, va_list args);
+++__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
+++					   const char *fmt, ...);
+++
+++static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
+++{
+++	struct bpf_verifier_state *cur = env->cur_state;
+++
+++	return cur->frame[cur->curframe];
+++}
+++
+++static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
+++{
+++	return cur_func(env)->regs;
+++}
+++
+++int bpf_prog_offload_verifier_prep(struct bpf_prog *prog);
+++int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
+++				 int insn_idx, int prev_insn_idx);
+++int bpf_prog_offload_finalize(struct bpf_verifier_env *env);
+++void
+++bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
+++			      struct bpf_insn *insn);
+++void
+++bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
+++
+++#endif /* _LINUX_BPF_VERIFIER_H */
++--- /dev/null
+++++ b/include/linux/btf.h
++@@ -0,0 +1,72 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
+++/* Copyright (c) 2018 Facebook */
+++
+++#ifndef _LINUX_BTF_H
+++#define _LINUX_BTF_H 1
+++
+++#include <linux/types.h>
+++
+++struct btf;
+++struct btf_member;
+++struct btf_type;
+++union bpf_attr;
+++
+++extern const struct file_operations btf_fops;
+++
+++void btf_put(struct btf *btf);
+++int btf_new_fd(const union bpf_attr *attr);
+++struct btf *btf_get_by_fd(int fd);
+++int btf_get_info_by_fd(const struct btf *btf,
+++		       const union bpf_attr *attr,
+++		       union bpf_attr __user *uattr);
+++/* Figure out the size of a type_id.  If type_id is a modifier
+++ * (e.g. const), it will be resolved to find out the type with size.
+++ *
+++ * For example:
+++ * In describing "const void *",  type_id is "const" and "const"
+++ * refers to "void *".  The return type will be "void *".
+++ *
+++ * If type_id is a simple "int", then return type will be "int".
+++ *
+++ * @btf: struct btf object
+++ * @type_id: Find out the size of type_id. The type_id of the return
+++ *           type is set to *type_id.
+++ * @ret_size: It can be NULL.  If not NULL, the size of the return
+++ *            type is set to *ret_size.
+++ * Return: The btf_type (resolved to another type with size info if needed).
+++ *         NULL is returned if type_id itself does not have size info
+++ *         (e.g. void) or it cannot be resolved to another type that
+++ *         has size info.
+++ *         *type_id and *ret_size will not be changed in the
+++ *         NULL return case.
+++ */
+++const struct btf_type *btf_type_id_size(const struct btf *btf,
+++					u32 *type_id,
+++					u32 *ret_size);
+++void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
+++		       struct seq_file *m);
+++int btf_get_fd_by_id(u32 id);
+++u32 btf_id(const struct btf *btf);
+++bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
+++			   const struct btf_member *m,
+++			   u32 expected_offset, u32 expected_size);
+++int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
+++bool btf_type_is_void(const struct btf_type *t);
+++
+++#ifdef CONFIG_BPF_SYSCALL
+++const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
+++const char *btf_name_by_offset(const struct btf *btf, u32 offset);
+++#else
+++static inline const struct btf_type *btf_type_by_id(const struct btf *btf,
+++						    u32 type_id)
+++{
+++	return NULL;
+++}
+++static inline const char *btf_name_by_offset(const struct btf *btf,
+++					     u32 offset)
+++{
+++	return NULL;
+++}
+++#endif
+++
+++#endif
++--- a/include/uapi/linux/bpf_common.h
+++++ b/include/uapi/linux/bpf_common.h
++@@ -1,3 +1,4 @@
+++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
++ #ifndef _UAPI__LINUX_BPF_COMMON_H__
++ #define _UAPI__LINUX_BPF_COMMON_H__
++ 
++@@ -14,9 +15,10 @@
++ 
++ /* ld/ldx fields */
++ #define BPF_SIZE(code)  ((code) & 0x18)
++-#define		BPF_W		0x00
++-#define		BPF_H		0x08
++-#define		BPF_B		0x10
+++#define		BPF_W		0x00 /* 32-bit */
+++#define		BPF_H		0x08 /* 16-bit */
+++#define		BPF_B		0x10 /*  8-bit */
+++/* eBPF		BPF_DW		0x18    64-bit */
++ #define BPF_MODE(code)  ((code) & 0xe0)
++ #define		BPF_IMM		0x00
++ #define		BPF_ABS		0x20
++--- a/include/uapi/linux/bpf.h
+++++ b/include/uapi/linux/bpf.h
++@@ -1,3 +1,4 @@
+++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
++ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
++  *
++  * This program is free software; you can redistribute it and/or
++@@ -13,10 +14,11 @@
++ /* Extended instruction set based on top of classic BPF */
++ 
++ /* instruction classes */
+++#define BPF_JMP32	0x06	/* jmp mode in word width */
++ #define BPF_ALU64	0x07	/* alu mode in double word width */
++ 
++ /* ld/ldx fields */
++-#define BPF_DW		0x18	/* double word */
+++#define BPF_DW		0x18	/* double word (64-bit) */
++ #define BPF_XADD	0xc0	/* exclusive add */
++ 
++ /* alu/jmp fields */
++@@ -30,9 +32,14 @@
++ #define BPF_FROM_LE	BPF_TO_LE
++ #define BPF_FROM_BE	BPF_TO_BE
++ 
+++/* jmp encodings */
++ #define BPF_JNE		0x50	/* jump != */
+++#define BPF_JLT		0xa0	/* LT is unsigned, '<' */
+++#define BPF_JLE		0xb0	/* LE is unsigned, '<=' */
++ #define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
++ #define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
+++#define BPF_JSLT	0xc0	/* SLT is signed, '<' */
+++#define BPF_JSLE	0xd0	/* SLE is signed, '<=' */
++ #define BPF_CALL	0x80	/* function call */
++ #define BPF_EXIT	0x90	/* function return */
++ 
++@@ -63,6 +70,17 @@ struct bpf_insn {
++ 	__s32	imm;		/* signed immediate constant */
++ };
++ 
+++/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+++struct bpf_lpm_trie_key {
+++	__u32	prefixlen;	/* up to 32 for AF_INET, 128 for AF_INET6 */
+++	__u8	data[0];	/* Arbitrary size */
+++};
+++
+++struct bpf_cgroup_storage_key {
+++	__u64	cgroup_inode_id;	/* cgroup inode id */
+++	__u32	attach_type;		/* program attach type */
+++};
+++
++ /* BPF syscall commands, see bpf(2) man-page for details. */
++ enum bpf_cmd {
++ 	BPF_MAP_CREATE,
++@@ -73,6 +91,22 @@ enum bpf_cmd {
++ 	BPF_PROG_LOAD,
++ 	BPF_OBJ_PIN,
++ 	BPF_OBJ_GET,
+++	BPF_PROG_ATTACH,
+++	BPF_PROG_DETACH,
+++	BPF_PROG_TEST_RUN,
+++	BPF_PROG_GET_NEXT_ID,
+++	BPF_MAP_GET_NEXT_ID,
+++	BPF_PROG_GET_FD_BY_ID,
+++	BPF_MAP_GET_FD_BY_ID,
+++	BPF_OBJ_GET_INFO_BY_FD,
+++	BPF_PROG_QUERY,
+++	BPF_RAW_TRACEPOINT_OPEN,
+++	BPF_BTF_LOAD,
+++	BPF_BTF_GET_FD_BY_ID,
+++	BPF_TASK_FD_QUERY,
+++	BPF_MAP_LOOKUP_AND_DELETE_ELEM,
+++	BPF_MAP_FREEZE,
+++	BPF_BTF_GET_NEXT_ID,
++ };
++ 
++ enum bpf_map_type {
++@@ -81,22 +115,256 @@ enum bpf_map_type {
++ 	BPF_MAP_TYPE_ARRAY,
++ 	BPF_MAP_TYPE_PROG_ARRAY,
++ 	BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+++	BPF_MAP_TYPE_PERCPU_HASH,
+++	BPF_MAP_TYPE_PERCPU_ARRAY,
+++	BPF_MAP_TYPE_STACK_TRACE,
+++	BPF_MAP_TYPE_CGROUP_ARRAY,
+++	BPF_MAP_TYPE_LRU_HASH,
+++	BPF_MAP_TYPE_LRU_PERCPU_HASH,
+++	BPF_MAP_TYPE_LPM_TRIE,
+++	BPF_MAP_TYPE_ARRAY_OF_MAPS,
+++	BPF_MAP_TYPE_HASH_OF_MAPS,
+++	BPF_MAP_TYPE_DEVMAP,
+++	BPF_MAP_TYPE_SOCKMAP,
+++	BPF_MAP_TYPE_CPUMAP,
+++	BPF_MAP_TYPE_XSKMAP,
+++	BPF_MAP_TYPE_SOCKHASH,
+++	BPF_MAP_TYPE_CGROUP_STORAGE,
+++	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+++	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+++	BPF_MAP_TYPE_QUEUE,
+++	BPF_MAP_TYPE_STACK,
+++	BPF_MAP_TYPE_SK_STORAGE,
+++	BPF_MAP_TYPE_DEVMAP_HASH,
++ };
++ 
+++/* Note that tracing related programs such as
+++ * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT}
+++ * are not subject to a stable API since kernel internal data
+++ * structures can change from release to release and may
+++ * therefore break existing tracing BPF programs. Tracing BPF
+++ * programs correspond to /a/ specific kernel which is to be
+++ * analyzed, and not /a/ specific kernel /and/ all future ones.
+++ */
++ enum bpf_prog_type {
++ 	BPF_PROG_TYPE_UNSPEC,
++ 	BPF_PROG_TYPE_SOCKET_FILTER,
++ 	BPF_PROG_TYPE_KPROBE,
++ 	BPF_PROG_TYPE_SCHED_CLS,
++ 	BPF_PROG_TYPE_SCHED_ACT,
+++	BPF_PROG_TYPE_TRACEPOINT,
+++	BPF_PROG_TYPE_XDP,
+++	BPF_PROG_TYPE_PERF_EVENT,
+++	BPF_PROG_TYPE_CGROUP_SKB,
+++	BPF_PROG_TYPE_CGROUP_SOCK,
+++	BPF_PROG_TYPE_LWT_IN,
+++	BPF_PROG_TYPE_LWT_OUT,
+++	BPF_PROG_TYPE_LWT_XMIT,
+++	BPF_PROG_TYPE_SOCK_OPS,
+++	BPF_PROG_TYPE_SK_SKB,
+++	BPF_PROG_TYPE_CGROUP_DEVICE,
+++	BPF_PROG_TYPE_SK_MSG,
+++	BPF_PROG_TYPE_RAW_TRACEPOINT,
+++	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+++	BPF_PROG_TYPE_LWT_SEG6LOCAL,
+++	BPF_PROG_TYPE_LIRC_MODE2,
+++	BPF_PROG_TYPE_SK_REUSEPORT,
+++	BPF_PROG_TYPE_FLOW_DISSECTOR,
+++	BPF_PROG_TYPE_CGROUP_SYSCTL,
+++	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+++	BPF_PROG_TYPE_CGROUP_SOCKOPT,
++ };
++ 
+++enum bpf_attach_type {
+++	BPF_CGROUP_INET_INGRESS,
+++	BPF_CGROUP_INET_EGRESS,
+++	BPF_CGROUP_INET_SOCK_CREATE,
+++	BPF_CGROUP_SOCK_OPS,
+++	BPF_SK_SKB_STREAM_PARSER,
+++	BPF_SK_SKB_STREAM_VERDICT,
+++	BPF_CGROUP_DEVICE,
+++	BPF_SK_MSG_VERDICT,
+++	BPF_CGROUP_INET4_BIND,
+++	BPF_CGROUP_INET6_BIND,
+++	BPF_CGROUP_INET4_CONNECT,
+++	BPF_CGROUP_INET6_CONNECT,
+++	BPF_CGROUP_INET4_POST_BIND,
+++	BPF_CGROUP_INET6_POST_BIND,
+++	BPF_CGROUP_UDP4_SENDMSG,
+++	BPF_CGROUP_UDP6_SENDMSG,
+++	BPF_LIRC_MODE2,
+++	BPF_FLOW_DISSECTOR,
+++	BPF_CGROUP_SYSCTL,
+++	BPF_CGROUP_UDP4_RECVMSG,
+++	BPF_CGROUP_UDP6_RECVMSG,
+++	BPF_CGROUP_GETSOCKOPT,
+++	BPF_CGROUP_SETSOCKOPT,
+++	__MAX_BPF_ATTACH_TYPE
+++};
+++
+++#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+++
+++/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
+++ *
+++ * NONE(default): No further bpf programs allowed in the subtree.
+++ *
+++ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
+++ * the program in this cgroup yields to sub-cgroup program.
+++ *
+++ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
+++ * that cgroup program gets run in addition to the program in this cgroup.
+++ *
+++ * Only one program is allowed to be attached to a cgroup with
+++ * NONE or BPF_F_ALLOW_OVERRIDE flag.
+++ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
+++ * release old program and attach the new one. Attach flags has to match.
+++ *
+++ * Multiple programs are allowed to be attached to a cgroup with
+++ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
+++ * (those that were attached first, run first)
+++ * The programs of sub-cgroup are executed first, then programs of
+++ * this cgroup and then programs of parent cgroup.
+++ * When children program makes decision (like picking TCP CA or sock bind)
+++ * parent program has a chance to override it.
+++ *
+++ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
+++ * A cgroup with NONE doesn't allow any programs in sub-cgroups.
+++ * Ex1:
+++ * cgrp1 (MULTI progs A, B) ->
+++ *    cgrp2 (OVERRIDE prog C) ->
+++ *      cgrp3 (MULTI prog D) ->
+++ *        cgrp4 (OVERRIDE prog E) ->
+++ *          cgrp5 (NONE prog F)
+++ * the event in cgrp5 triggers execution of F,D,A,B in that order.
+++ * if prog F is detached, the execution is E,D,A,B
+++ * if prog F and D are detached, the execution is E,A,B
+++ * if prog F, E and D are detached, the execution is C,A,B
+++ *
+++ * All eligible programs are executed regardless of return code from
+++ * earlier programs.
+++ */
+++#define BPF_F_ALLOW_OVERRIDE	(1U << 0)
+++#define BPF_F_ALLOW_MULTI	(1U << 1)
+++
+++/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+++ * verifier will perform strict alignment checking as if the kernel
+++ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+++ * and NET_IP_ALIGN defined to 2.
+++ */
+++#define BPF_F_STRICT_ALIGNMENT	(1U << 0)
+++
+++/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
+++ * verifier will allow any alignment whatsoever.  On platforms
+++ * with strict alignment requirements for loads ands stores (such
+++ * as sparc and mips) the verifier validates that all loads and
+++ * stores provably follow this requirement.  This flag turns that
+++ * checking and enforcement off.
+++ *
+++ * It is mostly used for testing when we want to validate the
+++ * context and memory access aspects of the verifier, but because
+++ * of an unaligned access the alignment check would trigger before
+++ * the one we are interested in.
+++ */
+++#define BPF_F_ANY_ALIGNMENT	(1U << 1)
+++
+++/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
+++ * Verifier does sub-register def/use analysis and identifies instructions whose
+++ * def only matters for low 32-bit, high 32-bit is never referenced later
+++ * through implicit zero extension. Therefore verifier notifies JIT back-ends
+++ * that it is safe to ignore clearing high 32-bit for these instructions. This
+++ * saves some back-ends a lot of code-gen. However such optimization is not
+++ * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
+++ * hence hasn't used verifier's analysis result. But, we really want to have a
+++ * way to be able to verify the correctness of the described optimization on
+++ * x86_64 on which testsuites are frequently exercised.
+++ *
+++ * So, this flag is introduced. Once it is set, verifier will randomize high
+++ * 32-bit for those instructions who has been identified as safe to ignore them.
+++ * Then, if verifier is not doing correct analysis, such randomization will
+++ * regress tests to expose bugs.
+++ */
+++#define BPF_F_TEST_RND_HI32	(1U << 2)
+++
+++/* The verifier internal test flag. Behavior is undefined */
+++#define BPF_F_TEST_STATE_FREQ	(1U << 3)
+++
+++/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
+++ * two extensions:
+++ *
+++ * insn[0].src_reg:  BPF_PSEUDO_MAP_FD   BPF_PSEUDO_MAP_VALUE
+++ * insn[0].imm:      map fd              map fd
+++ * insn[1].imm:      0                   offset into value
+++ * insn[0].off:      0                   0
+++ * insn[1].off:      0                   0
+++ * ldimm64 rewrite:  address of map      address of map[0]+offset
+++ * verifier type:    CONST_PTR_TO_MAP    PTR_TO_MAP_VALUE
+++ */
++ #define BPF_PSEUDO_MAP_FD	1
+++#define BPF_PSEUDO_MAP_VALUE	2
+++
+++/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
+++ * offset to another bpf function
+++ */
+++#define BPF_PSEUDO_CALL		1
++ 
++ /* flags for BPF_MAP_UPDATE_ELEM command */
++ #define BPF_ANY		0 /* create new element or update existing */
++ #define BPF_NOEXIST	1 /* create new element if it didn't exist */
++ #define BPF_EXIST	2 /* update existing element */
+++#define BPF_F_LOCK	4 /* spin_lock-ed map_lookup/map_update */
+++
+++/* flags for BPF_MAP_CREATE command */
+++#define BPF_F_NO_PREALLOC	(1U << 0)
+++/* Instead of having one common LRU list in the
+++ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
+++ * which can scale and perform better.
+++ * Note, the LRU nodes (including free nodes) cannot be moved
+++ * across different LRU lists.
+++ */
+++#define BPF_F_NO_COMMON_LRU	(1U << 1)
+++/* Specify numa node during map creation */
+++#define BPF_F_NUMA_NODE		(1U << 2)
+++
+++#define BPF_OBJ_NAME_LEN 16U
+++
+++/* Flags for accessing BPF object from syscall side. */
+++#define BPF_F_RDONLY		(1U << 3)
+++#define BPF_F_WRONLY		(1U << 4)
+++
+++/* Flag for stack_map, store build_id+offset instead of pointer */
+++#define BPF_F_STACK_BUILD_ID	(1U << 5)
+++
+++/* Zero-initialize hash function seed. This should only be used for testing. */
+++#define BPF_F_ZERO_SEED		(1U << 6)
+++
+++/* Flags for accessing BPF object from program side. */
+++#define BPF_F_RDONLY_PROG	(1U << 7)
+++#define BPF_F_WRONLY_PROG	(1U << 8)
+++
+++/* Clone map from listener for newly accepted socket */
+++#define BPF_F_CLONE		(1U << 9)
+++
+++/* flags for BPF_PROG_QUERY */
+++#define BPF_F_QUERY_EFFECTIVE	(1U << 0)
+++
+++enum bpf_stack_build_id_status {
+++	/* user space need an empty entry to identify end of a trace */
+++	BPF_STACK_BUILD_ID_EMPTY = 0,
+++	/* with valid build_id and offset */
+++	BPF_STACK_BUILD_ID_VALID = 1,
+++	/* couldn't get build_id, fallback to ip */
+++	BPF_STACK_BUILD_ID_IP = 2,
+++};
+++
+++#define BPF_BUILD_ID_SIZE 20
+++struct bpf_stack_build_id {
+++	__s32		status;
+++	unsigned char	build_id[BPF_BUILD_ID_SIZE];
+++	union {
+++		__u64	offset;
+++		__u64	ip;
+++	};
+++};
++ 
++ union bpf_attr {
++ 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
++@@ -104,6 +372,18 @@ union bpf_attr {
++ 		__u32	key_size;	/* size of key in bytes */
++ 		__u32	value_size;	/* size of value in bytes */
++ 		__u32	max_entries;	/* max number of entries in a map */
+++		__u32	map_flags;	/* BPF_MAP_CREATE related
+++					 * flags defined above.
+++					 */
+++		__u32	inner_map_fd;	/* fd pointing to the inner map */
+++		__u32	numa_node;	/* numa node (effective only if
+++					 * BPF_F_NUMA_NODE is set).
+++					 */
+++		char	map_name[BPF_OBJ_NAME_LEN];
+++		__u32	map_ifindex;	/* ifindex of netdev to create on */
+++		__u32	btf_fd;		/* fd pointing to a BTF type data */
+++		__u32	btf_key_type_id;	/* BTF type_id of the key */
+++		__u32	btf_value_type_id;	/* BTF type_id of the value */
++ 	};
++ 
++ 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
++@@ -124,154 +404,2568 @@ union bpf_attr {
++ 		__u32		log_level;	/* verbosity level of verifier */
++ 		__u32		log_size;	/* size of user buffer */
++ 		__aligned_u64	log_buf;	/* user supplied buffer */
++-		__u32		kern_version;	/* checked when prog_type=kprobe */
+++		__u32		kern_version;	/* not used */
+++		__u32		prog_flags;
+++		char		prog_name[BPF_OBJ_NAME_LEN];
+++		__u32		prog_ifindex;	/* ifindex of netdev to prep for */
+++		/* For some prog types expected attach type must be known at
+++		 * load time to verify attach type specific parts of prog
+++		 * (context accesses, allowed helpers, etc).
+++		 */
+++		__u32		expected_attach_type;
+++		__u32		prog_btf_fd;	/* fd pointing to BTF type data */
+++		__u32		func_info_rec_size;	/* userspace bpf_func_info size */
+++		__aligned_u64	func_info;	/* func info */
+++		__u32		func_info_cnt;	/* number of bpf_func_info records */
+++		__u32		line_info_rec_size;	/* userspace bpf_line_info size */
+++		__aligned_u64	line_info;	/* line info */
+++		__u32		line_info_cnt;	/* number of bpf_line_info records */
++ 	};
++ 
++ 	struct { /* anonymous struct used by BPF_OBJ_* commands */
++ 		__aligned_u64	pathname;
++ 		__u32		bpf_fd;
+++		__u32		file_flags;
+++	};
+++
+++	struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+++		__u32		target_fd;	/* container object to attach to */
+++		__u32		attach_bpf_fd;	/* eBPF program to attach */
+++		__u32		attach_type;
+++		__u32		attach_flags;
+++	};
+++
+++	struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+++		__u32		prog_fd;
+++		__u32		retval;
+++		__u32		data_size_in;	/* input: len of data_in */
+++		__u32		data_size_out;	/* input/output: len of data_out
+++						 *   returns ENOSPC if data_out
+++						 *   is too small.
+++						 */
+++		__aligned_u64	data_in;
+++		__aligned_u64	data_out;
+++		__u32		repeat;
+++		__u32		duration;
+++		__u32		ctx_size_in;	/* input: len of ctx_in */
+++		__u32		ctx_size_out;	/* input/output: len of ctx_out
+++						 *   returns ENOSPC if ctx_out
+++						 *   is too small.
+++						 */
+++		__aligned_u64	ctx_in;
+++		__aligned_u64	ctx_out;
+++	} test;
+++
+++	struct { /* anonymous struct used by BPF_*_GET_*_ID */
+++		union {
+++			__u32		start_id;
+++			__u32		prog_id;
+++			__u32		map_id;
+++			__u32		btf_id;
+++		};
+++		__u32		next_id;
+++		__u32		open_flags;
++ 	};
+++
+++	struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
+++		__u32		bpf_fd;
+++		__u32		info_len;
+++		__aligned_u64	info;
+++	} info;
+++
+++	struct { /* anonymous struct used by BPF_PROG_QUERY command */
+++		__u32		target_fd;	/* container object to query */
+++		__u32		attach_type;
+++		__u32		query_flags;
+++		__u32		attach_flags;
+++		__aligned_u64	prog_ids;
+++		__u32		prog_cnt;
+++	} query;
+++
+++	struct {
+++		__u64 name;
+++		__u32 prog_fd;
+++	} raw_tracepoint;
+++
+++	struct { /* anonymous struct for BPF_BTF_LOAD */
+++		__aligned_u64	btf;
+++		__aligned_u64	btf_log_buf;
+++		__u32		btf_size;
+++		__u32		btf_log_size;
+++		__u32		btf_log_level;
+++	};
+++
+++	struct {
+++		__u32		pid;		/* input: pid */
+++		__u32		fd;		/* input: fd */
+++		__u32		flags;		/* input: flags */
+++		__u32		buf_len;	/* input/output: buf len */
+++		__aligned_u64	buf;		/* input/output:
+++						 *   tp_name for tracepoint
+++						 *   symbol for kprobe
+++						 *   filename for uprobe
+++						 */
+++		__u32		prog_id;	/* output: prod_id */
+++		__u32		fd_type;	/* output: BPF_FD_TYPE_* */
+++		__u64		probe_offset;	/* output: probe_offset */
+++		__u64		probe_addr;	/* output: probe_addr */
+++	} task_fd_query;
++ } __attribute__((aligned(8)));
++ 
+++/* The description below is an attempt at providing documentation to eBPF
+++ * developers about the multiple available eBPF helper functions. It can be
+++ * parsed and used to produce a manual page. The workflow is the following,
+++ * and requires the rst2man utility:
+++ *
+++ *     $ ./scripts/bpf_helpers_doc.py \
+++ *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+++ *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+++ *     $ man /tmp/bpf-helpers.7
+++ *
+++ * Note that in order to produce this external documentation, some RST
+++ * formatting is used in the descriptions to get "bold" and "italics" in
+++ * manual pages. Also note that the few trailing white spaces are
+++ * intentional, removing them would break paragraphs for rst2man.
+++ *
+++ * Start of BPF helper function descriptions:
+++ *
+++ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+++ * 	Description
+++ * 		Perform a lookup in *map* for an entry associated to *key*.
+++ * 	Return
+++ * 		Map value associated to *key*, or **NULL** if no entry was
+++ * 		found.
+++ *
+++ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+++ * 	Description
+++ * 		Add or update the value of the entry associated to *key* in
+++ * 		*map* with *value*. *flags* is one of:
+++ *
+++ * 		**BPF_NOEXIST**
+++ * 			The entry for *key* must not exist in the map.
+++ * 		**BPF_EXIST**
+++ * 			The entry for *key* must already exist in the map.
+++ * 		**BPF_ANY**
+++ * 			No condition on the existence of the entry for *key*.
+++ *
+++ * 		Flag value **BPF_NOEXIST** cannot be used for maps of types
+++ * 		**BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY**  (all
+++ * 		elements always exist), the helper would return an error.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+++ * 	Description
+++ * 		Delete entry with *key* from *map*.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_probe_read(void *dst, u32 size, const void *src)
+++ * 	Description
+++ * 		For tracing programs, safely attempt to read *size* bytes from
+++ * 		address *src* and store the data in *dst*.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * u64 bpf_ktime_get_ns(void)
+++ * 	Description
+++ * 		Return the time elapsed since system boot, in nanoseconds.
+++ * 	Return
+++ * 		Current *ktime*.
+++ *
+++ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+++ * 	Description
+++ * 		This helper is a "printk()-like" facility for debugging. It
+++ * 		prints a message defined by format *fmt* (of size *fmt_size*)
+++ * 		to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+++ * 		available. It can take up to three additional **u64**
+++ * 		arguments (as an eBPF helpers, the total number of arguments is
+++ * 		limited to five).
+++ *
+++ * 		Each time the helper is called, it appends a line to the trace.
+++ * 		Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
+++ * 		open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
+++ * 		The format of the trace is customizable, and the exact output
+++ * 		one will get depends on the options set in
+++ * 		*\/sys/kernel/debug/tracing/trace_options* (see also the
+++ * 		*README* file under the same directory). However, it usually
+++ * 		defaults to something like:
+++ *
+++ * 		::
+++ *
+++ * 			telnet-470   [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+++ *
+++ * 		In the above:
+++ *
+++ * 			* ``telnet`` is the name of the current task.
+++ * 			* ``470`` is the PID of the current task.
+++ * 			* ``001`` is the CPU number on which the task is
+++ * 			  running.
+++ * 			* In ``.N..``, each character refers to a set of
+++ * 			  options (whether irqs are enabled, scheduling
+++ * 			  options, whether hard/softirqs are running, level of
+++ * 			  preempt_disabled respectively). **N** means that
+++ * 			  **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+++ * 			  are set.
+++ * 			* ``419421.045894`` is a timestamp.
+++ * 			* ``0x00000001`` is a fake value used by BPF for the
+++ * 			  instruction pointer register.
+++ * 			* ``<formatted msg>`` is the message formatted with
+++ * 			  *fmt*.
+++ *
+++ * 		The conversion specifiers supported by *fmt* are similar, but
+++ * 		more limited than for printk(). They are **%d**, **%i**,
+++ * 		**%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+++ * 		**%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+++ * 		of field, padding with zeroes, etc.) is available, and the
+++ * 		helper will return **-EINVAL** (but print nothing) if it
+++ * 		encounters an unknown specifier.
+++ *
+++ * 		Also, note that **bpf_trace_printk**\ () is slow, and should
+++ * 		only be used for debugging purposes. For this reason, a notice
+++ * 		bloc (spanning several lines) is printed to kernel logs and
+++ * 		states that the helper should not be used "for production use"
+++ * 		the first time this helper is used (or more precisely, when
+++ * 		**trace_printk**\ () buffers are allocated). For passing values
+++ * 		to user space, perf events should be preferred.
+++ * 	Return
+++ * 		The number of bytes written to the buffer, or a negative error
+++ * 		in case of failure.
+++ *
+++ * u32 bpf_get_prandom_u32(void)
+++ * 	Description
+++ * 		Get a pseudo-random number.
+++ *
+++ * 		From a security point of view, this helper uses its own
+++ * 		pseudo-random internal state, and cannot be used to infer the
+++ * 		seed of other random functions in the kernel. However, it is
+++ * 		essential to note that the generator used by the helper is not
+++ * 		cryptographically secure.
+++ * 	Return
+++ * 		A random 32-bit unsigned value.
+++ *
+++ * u32 bpf_get_smp_processor_id(void)
+++ * 	Description
+++ * 		Get the SMP (symmetric multiprocessing) processor id. Note that
+++ * 		all programs run with preemption disabled, which means that the
+++ * 		SMP processor id is stable during all the execution of the
+++ * 		program.
+++ * 	Return
+++ * 		The SMP id of the processor running the program.
+++ *
+++ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+++ * 	Description
+++ * 		Store *len* bytes from address *from* into the packet
+++ * 		associated to *skb*, at *offset*. *flags* are a combination of
+++ * 		**BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+++ * 		checksum for the packet after storing the bytes) and
+++ * 		**BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+++ * 		**->swhash** and *skb*\ **->l4hash** to 0).
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+++ * 	Description
+++ * 		Recompute the layer 3 (e.g. IP) checksum for the packet
+++ * 		associated to *skb*. Computation is incremental, so the helper
+++ * 		must know the former value of the header field that was
+++ * 		modified (*from*), the new value of this field (*to*), and the
+++ * 		number of bytes (2 or 4) for this field, stored in *size*.
+++ * 		Alternatively, it is possible to store the difference between
+++ * 		the previous and the new values of the header field in *to*, by
+++ * 		setting *from* and *size* to 0. For both methods, *offset*
+++ * 		indicates the location of the IP checksum within the packet.
+++ *
+++ * 		This helper works in combination with **bpf_csum_diff**\ (),
+++ * 		which does not update the checksum in-place, but offers more
+++ * 		flexibility and can handle sizes larger than 2 or 4 for the
+++ * 		checksum to update.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+++ * 	Description
+++ * 		Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+++ * 		packet associated to *skb*. Computation is incremental, so the
+++ * 		helper must know the former value of the header field that was
+++ * 		modified (*from*), the new value of this field (*to*), and the
+++ * 		number of bytes (2 or 4) for this field, stored on the lowest
+++ * 		four bits of *flags*. Alternatively, it is possible to store
+++ * 		the difference between the previous and the new values of the
+++ * 		header field in *to*, by setting *from* and the four lowest
+++ * 		bits of *flags* to 0. For both methods, *offset* indicates the
+++ * 		location of the IP checksum within the packet. In addition to
+++ * 		the size of the field, *flags* can be added (bitwise OR) actual
+++ * 		flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+++ * 		untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+++ * 		for updates resulting in a null checksum the value is set to
+++ * 		**CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+++ * 		the checksum is to be computed against a pseudo-header.
+++ *
+++ * 		This helper works in combination with **bpf_csum_diff**\ (),
+++ * 		which does not update the checksum in-place, but offers more
+++ * 		flexibility and can handle sizes larger than 2 or 4 for the
+++ * 		checksum to update.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+++ * 	Description
+++ * 		This special helper is used to trigger a "tail call", or in
+++ * 		other words, to jump into another eBPF program. The same stack
+++ * 		frame is used (but values on stack and in registers for the
+++ * 		caller are not accessible to the callee). This mechanism allows
+++ * 		for program chaining, either for raising the maximum number of
+++ * 		available eBPF instructions, or to execute given programs in
+++ * 		conditional blocks. For security reasons, there is an upper
+++ * 		limit to the number of successive tail calls that can be
+++ * 		performed.
+++ *
+++ * 		Upon call of this helper, the program attempts to jump into a
+++ * 		program referenced at index *index* in *prog_array_map*, a
+++ * 		special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+++ * 		*ctx*, a pointer to the context.
+++ *
+++ * 		If the call succeeds, the kernel immediately runs the first
+++ * 		instruction of the new program. This is not a function call,
+++ * 		and it never returns to the previous program. If the call
+++ * 		fails, then the helper has no effect, and the caller continues
+++ * 		to run its subsequent instructions. A call can fail if the
+++ * 		destination program for the jump does not exist (i.e. *index*
+++ * 		is superior to the number of entries in *prog_array_map*), or
+++ * 		if the maximum number of tail calls has been reached for this
+++ * 		chain of programs. This limit is defined in the kernel by the
+++ * 		macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+++ * 		which is currently set to 32.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+++ * 	Description
+++ * 		Clone and redirect the packet associated to *skb* to another
+++ * 		net device of index *ifindex*. Both ingress and egress
+++ * 		interfaces can be used for redirection. The **BPF_F_INGRESS**
+++ * 		value in *flags* is used to make the distinction (ingress path
+++ * 		is selected if the flag is present, egress path otherwise).
+++ * 		This is the only flag supported for now.
+++ *
+++ * 		In comparison with **bpf_redirect**\ () helper,
+++ * 		**bpf_clone_redirect**\ () has the associated cost of
+++ * 		duplicating the packet buffer, but this can be executed out of
+++ * 		the eBPF program. Conversely, **bpf_redirect**\ () is more
+++ * 		efficient, but it is handled through an action code where the
+++ * 		redirection happens only after the eBPF program has returned.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * u64 bpf_get_current_pid_tgid(void)
+++ * 	Return
+++ * 		A 64-bit integer containing the current tgid and pid, and
+++ * 		created as such:
+++ * 		*current_task*\ **->tgid << 32 \|**
+++ * 		*current_task*\ **->pid**.
+++ *
+++ * u64 bpf_get_current_uid_gid(void)
+++ * 	Return
+++ * 		A 64-bit integer containing the current GID and UID, and
+++ * 		created as such: *current_gid* **<< 32 \|** *current_uid*.
+++ *
+++ * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+++ * 	Description
+++ * 		Copy the **comm** attribute of the current task into *buf* of
+++ * 		*size_of_buf*. The **comm** attribute contains the name of
+++ * 		the executable (excluding the path) for the current task. The
+++ * 		*size_of_buf* must be strictly positive. On success, the
+++ * 		helper makes sure that the *buf* is NUL-terminated. On failure,
+++ * 		it is filled with zeroes.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+++ * 	Description
+++ * 		Retrieve the classid for the current task, i.e. for the net_cls
+++ * 		cgroup to which *skb* belongs.
+++ *
+++ * 		This helper can be used on TC egress path, but not on ingress.
+++ *
+++ * 		The net_cls cgroup provides an interface to tag network packets
+++ * 		based on a user-provided identifier for all traffic coming from
+++ * 		the tasks belonging to the related cgroup. See also the related
+++ * 		kernel documentation, available from the Linux sources in file
+++ * 		*Documentation/admin-guide/cgroup-v1/net_cls.rst*.
+++ *
+++ * 		The Linux kernel has two versions for cgroups: there are
+++ * 		cgroups v1 and cgroups v2. Both are available to users, who can
+++ * 		use a mixture of them, but note that the net_cls cgroup is for
+++ * 		cgroup v1 only. This makes it incompatible with BPF programs
+++ * 		run on cgroups, which is a cgroup-v2-only feature (a socket can
+++ * 		only hold data for one version of cgroups at a time).
+++ *
+++ * 		This helper is only available is the kernel was compiled with
+++ * 		the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+++ * 		"**y**" or to "**m**".
+++ * 	Return
+++ * 		The classid, or 0 for the default unconfigured classid.
+++ *
+++ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+++ * 	Description
+++ * 		Push a *vlan_tci* (VLAN tag control information) of protocol
+++ * 		*vlan_proto* to the packet associated to *skb*, then update
+++ * 		the checksum. Note that if *vlan_proto* is different from
+++ * 		**ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+++ * 		be **ETH_P_8021Q**.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+++ * 	Description
+++ * 		Pop a VLAN header from the packet associated to *skb*.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+++ * 	Description
+++ * 		Get tunnel metadata. This helper takes a pointer *key* to an
+++ * 		empty **struct bpf_tunnel_key** of **size**, that will be
+++ * 		filled with tunnel metadata for the packet associated to *skb*.
+++ * 		The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+++ * 		indicates that the tunnel is based on IPv6 protocol instead of
+++ * 		IPv4.
+++ *
+++ * 		The **struct bpf_tunnel_key** is an object that generalizes the
+++ * 		principal parameters used by various tunneling protocols into a
+++ * 		single struct. This way, it can be used to easily make a
+++ * 		decision based on the contents of the encapsulation header,
+++ * 		"summarized" in this struct. In particular, it holds the IP
+++ * 		address of the remote end (IPv4 or IPv6, depending on the case)
+++ * 		in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+++ * 		this struct exposes the *key*\ **->tunnel_id**, which is
+++ * 		generally mapped to a VNI (Virtual Network Identifier), making
+++ * 		it programmable together with the **bpf_skb_set_tunnel_key**\
+++ * 		() helper.
+++ *
+++ * 		Let's imagine that the following code is part of a program
+++ * 		attached to the TC ingress interface, on one end of a GRE
+++ * 		tunnel, and is supposed to filter out all messages coming from
+++ * 		remote ends with IPv4 address other than 10.0.0.1:
+++ *
+++ * 		::
+++ *
+++ * 			int ret;
+++ * 			struct bpf_tunnel_key key = {};
+++ * 			
+++ * 			ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+++ * 			if (ret < 0)
+++ * 				return TC_ACT_SHOT;	// drop packet
+++ * 			
+++ * 			if (key.remote_ipv4 != 0x0a000001)
+++ * 				return TC_ACT_SHOT;	// drop packet
+++ * 			
+++ * 			return TC_ACT_OK;		// accept packet
+++ *
+++ * 		This interface can also be used with all encapsulation devices
+++ * 		that can operate in "collect metadata" mode: instead of having
+++ * 		one network device per specific configuration, the "collect
+++ * 		metadata" mode only requires a single device where the
+++ * 		configuration can be extracted from this helper.
+++ *
+++ * 		This can be used together with various tunnels such as VXLan,
+++ * 		Geneve, GRE or IP in IP (IPIP).
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+++ * 	Description
+++ * 		Populate tunnel metadata for packet associated to *skb.* The
+++ * 		tunnel metadata is set to the contents of *key*, of *size*. The
+++ * 		*flags* can be set to a combination of the following values:
+++ *
+++ * 		**BPF_F_TUNINFO_IPV6**
+++ * 			Indicate that the tunnel is based on IPv6 protocol
+++ * 			instead of IPv4.
+++ * 		**BPF_F_ZERO_CSUM_TX**
+++ * 			For IPv4 packets, add a flag to tunnel metadata
+++ * 			indicating that checksum computation should be skipped
+++ * 			and checksum set to zeroes.
+++ * 		**BPF_F_DONT_FRAGMENT**
+++ * 			Add a flag to tunnel metadata indicating that the
+++ * 			packet should not be fragmented.
+++ * 		**BPF_F_SEQ_NUMBER**
+++ * 			Add a flag to tunnel metadata indicating that a
+++ * 			sequence number should be added to tunnel header before
+++ * 			sending the packet. This flag was added for GRE
+++ * 			encapsulation, but might be used with other protocols
+++ * 			as well in the future.
+++ *
+++ * 		Here is a typical usage on the transmit path:
+++ *
+++ * 		::
+++ *
+++ * 			struct bpf_tunnel_key key;
+++ * 			     populate key ...
+++ * 			bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+++ * 			bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+++ *
+++ * 		See also the description of the **bpf_skb_get_tunnel_key**\ ()
+++ * 		helper for additional information.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+++ * 	Description
+++ * 		Read the value of a perf event counter. This helper relies on a
+++ * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+++ * 		the perf event counter is selected when *map* is updated with
+++ * 		perf event file descriptors. The *map* is an array whose size
+++ * 		is the number of available CPUs, and each cell contains a value
+++ * 		relative to one CPU. The value to retrieve is indicated by
+++ * 		*flags*, that contains the index of the CPU to look up, masked
+++ * 		with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+++ * 		**BPF_F_CURRENT_CPU** to indicate that the value for the
+++ * 		current CPU should be retrieved.
+++ *
+++ * 		Note that before Linux 4.13, only hardware perf event can be
+++ * 		retrieved.
+++ *
+++ * 		Also, be aware that the newer helper
+++ * 		**bpf_perf_event_read_value**\ () is recommended over
+++ * 		**bpf_perf_event_read**\ () in general. The latter has some ABI
+++ * 		quirks where error and counter value are used as a return code
+++ * 		(which is wrong to do since ranges may overlap). This issue is
+++ * 		fixed with **bpf_perf_event_read_value**\ (), which at the same
+++ * 		time provides more features over the **bpf_perf_event_read**\
+++ * 		() interface. Please refer to the description of
+++ * 		**bpf_perf_event_read_value**\ () for details.
+++ * 	Return
+++ * 		The value of the perf event counter read from the map, or a
+++ * 		negative error code in case of failure.
+++ *
+++ * int bpf_redirect(u32 ifindex, u64 flags)
+++ * 	Description
+++ * 		Redirect the packet to another net device of index *ifindex*.
+++ * 		This helper is somewhat similar to **bpf_clone_redirect**\
+++ * 		(), except that the packet is not cloned, which provides
+++ * 		increased performance.
+++ *
+++ * 		Except for XDP, both ingress and egress interfaces can be used
+++ * 		for redirection. The **BPF_F_INGRESS** value in *flags* is used
+++ * 		to make the distinction (ingress path is selected if the flag
+++ * 		is present, egress path otherwise). Currently, XDP only
+++ * 		supports redirection to the egress interface, and accepts no
+++ * 		flag at all.
+++ *
+++ * 		The same effect can be attained with the more generic
+++ * 		**bpf_redirect_map**\ (), which requires specific maps to be
+++ * 		used but offers better performance.
+++ * 	Return
+++ * 		For XDP, the helper returns **XDP_REDIRECT** on success or
+++ * 		**XDP_ABORTED** on error. For other program types, the values
+++ * 		are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+++ * 		error.
+++ *
+++ * u32 bpf_get_route_realm(struct sk_buff *skb)
+++ * 	Description
+++ * 		Retrieve the realm or the route, that is to say the
+++ * 		**tclassid** field of the destination for the *skb*. The
+++ * 		indentifier retrieved is a user-provided tag, similar to the
+++ * 		one used with the net_cls cgroup (see description for
+++ * 		**bpf_get_cgroup_classid**\ () helper), but here this tag is
+++ * 		held by a route (a destination entry), not by a task.
+++ *
+++ * 		Retrieving this identifier works with the clsact TC egress hook
+++ * 		(see also **tc-bpf(8)**), or alternatively on conventional
+++ * 		classful egress qdiscs, but not on TC ingress path. In case of
+++ * 		clsact TC egress hook, this has the advantage that, internally,
+++ * 		the destination entry has not been dropped yet in the transmit
+++ * 		path. Therefore, the destination entry does not need to be
+++ * 		artificially held via **netif_keep_dst**\ () for a classful
+++ * 		qdisc until the *skb* is freed.
+++ *
+++ * 		This helper is available only if the kernel was compiled with
+++ * 		**CONFIG_IP_ROUTE_CLASSID** configuration option.
+++ * 	Return
+++ * 		The realm of the route for the packet associated to *skb*, or 0
+++ * 		if none was found.
+++ *
+++ * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+++ * 	Description
+++ * 		Write raw *data* blob into a special BPF perf event held by
+++ * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+++ * 		event must have the following attributes: **PERF_SAMPLE_RAW**
+++ * 		as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+++ * 		**PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+++ *
+++ * 		The *flags* are used to indicate the index in *map* for which
+++ * 		the value must be put, masked with **BPF_F_INDEX_MASK**.
+++ * 		Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+++ * 		to indicate that the index of the current CPU core should be
+++ * 		used.
+++ *
+++ * 		The value to write, of *size*, is passed through eBPF stack and
+++ * 		pointed by *data*.
+++ *
+++ * 		The context of the program *ctx* needs also be passed to the
+++ * 		helper.
+++ *
+++ * 		On user space, a program willing to read the values needs to
+++ * 		call **perf_event_open**\ () on the perf event (either for
+++ * 		one or for all CPUs) and to store the file descriptor into the
+++ * 		*map*. This must be done before the eBPF program can send data
+++ * 		into it. An example is available in file
+++ * 		*samples/bpf/trace_output_user.c* in the Linux kernel source
+++ * 		tree (the eBPF program counterpart is in
+++ * 		*samples/bpf/trace_output_kern.c*).
+++ *
+++ * 		**bpf_perf_event_output**\ () achieves better performance
+++ * 		than **bpf_trace_printk**\ () for sharing data with user
+++ * 		space, and is much better suitable for streaming data from eBPF
+++ * 		programs.
+++ *
+++ * 		Note that this helper is not restricted to tracing use cases
+++ * 		and can be used with programs attached to TC or XDP as well,
+++ * 		where it allows for passing data to user space listeners. Data
+++ * 		can be:
+++ *
+++ * 		* Only custom structs,
+++ * 		* Only the packet payload, or
+++ * 		* A combination of both.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+++ * 	Description
+++ * 		This helper was provided as an easy way to load data from a
+++ * 		packet. It can be used to load *len* bytes from *offset* from
+++ * 		the packet associated to *skb*, into the buffer pointed by
+++ * 		*to*.
+++ *
+++ * 		Since Linux 4.7, usage of this helper has mostly been replaced
+++ * 		by "direct packet access", enabling packet data to be
+++ * 		manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+++ * 		pointing respectively to the first byte of packet data and to
+++ * 		the byte after the last byte of packet data. However, it
+++ * 		remains useful if one wishes to read large quantities of data
+++ * 		at once from a packet into the eBPF stack.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags)
+++ * 	Description
+++ * 		Walk a user or a kernel stack and return its id. To achieve
+++ * 		this, the helper needs *ctx*, which is a pointer to the context
+++ * 		on which the tracing program is executed, and a pointer to a
+++ * 		*map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+++ *
+++ * 		The last argument, *flags*, holds the number of stack frames to
+++ * 		skip (from 0 to 255), masked with
+++ * 		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+++ * 		a combination of the following flags:
+++ *
+++ * 		**BPF_F_USER_STACK**
+++ * 			Collect a user space stack instead of a kernel stack.
+++ * 		**BPF_F_FAST_STACK_CMP**
+++ * 			Compare stacks by hash only.
+++ * 		**BPF_F_REUSE_STACKID**
+++ * 			If two different stacks hash into the same *stackid*,
+++ * 			discard the old one.
+++ *
+++ * 		The stack id retrieved is a 32 bit long integer handle which
+++ * 		can be further combined with other data (including other stack
+++ * 		ids) and used as a key into maps. This can be useful for
+++ * 		generating a variety of graphs (such as flame graphs or off-cpu
+++ * 		graphs).
+++ *
+++ * 		For walking a stack, this helper is an improvement over
+++ * 		**bpf_probe_read**\ (), which can be used with unrolled loops
+++ * 		but is not efficient and consumes a lot of eBPF instructions.
+++ * 		Instead, **bpf_get_stackid**\ () can collect up to
+++ * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+++ * 		this limit can be controlled with the **sysctl** program, and
+++ * 		that it should be manually increased in order to profile long
+++ * 		user stacks (such as stacks for Java programs). To do so, use:
+++ *
+++ * 		::
+++ *
+++ * 			# sysctl kernel.perf_event_max_stack=<new value>
+++ * 	Return
+++ * 		The positive or null stack id on success, or a negative error
+++ * 		in case of failure.
+++ *
+++ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+++ * 	Description
+++ * 		Compute a checksum difference, from the raw buffer pointed by
+++ * 		*from*, of length *from_size* (that must be a multiple of 4),
+++ * 		towards the raw buffer pointed by *to*, of size *to_size*
+++ * 		(same remark). An optional *seed* can be added to the value
+++ * 		(this can be cascaded, the seed may come from a previous call
+++ * 		to the helper).
+++ *
+++ * 		This is flexible enough to be used in several ways:
+++ *
+++ * 		* With *from_size* == 0, *to_size* > 0 and *seed* set to
+++ * 		  checksum, it can be used when pushing new data.
+++ * 		* With *from_size* > 0, *to_size* == 0 and *seed* set to
+++ * 		  checksum, it can be used when removing data from a packet.
+++ * 		* With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+++ * 		  can be used to compute a diff. Note that *from_size* and
+++ * 		  *to_size* do not need to be equal.
+++ *
+++ * 		This helper can be used in combination with
+++ * 		**bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+++ * 		which one can feed in the difference computed with
+++ * 		**bpf_csum_diff**\ ().
+++ * 	Return
+++ * 		The checksum result, or a negative error code in case of
+++ * 		failure.
+++ *
+++ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+++ * 	Description
+++ * 		Retrieve tunnel options metadata for the packet associated to
+++ * 		*skb*, and store the raw tunnel option data to the buffer *opt*
+++ * 		of *size*.
+++ *
+++ * 		This helper can be used with encapsulation devices that can
+++ * 		operate in "collect metadata" mode (please refer to the related
+++ * 		note in the description of **bpf_skb_get_tunnel_key**\ () for
+++ * 		more details). A particular example where this can be used is
+++ * 		in combination with the Geneve encapsulation protocol, where it
+++ * 		allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+++ * 		and retrieving arbitrary TLVs (Type-Length-Value headers) from
+++ * 		the eBPF program. This allows for full customization of these
+++ * 		headers.
+++ * 	Return
+++ * 		The size of the option data retrieved.
+++ *
+++ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+++ * 	Description
+++ * 		Set tunnel options metadata for the packet associated to *skb*
+++ * 		to the option data contained in the raw buffer *opt* of *size*.
+++ *
+++ * 		See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+++ * 		helper for additional information.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+++ * 	Description
+++ * 		Change the protocol of the *skb* to *proto*. Currently
+++ * 		supported are transition from IPv4 to IPv6, and from IPv6 to
+++ * 		IPv4. The helper takes care of the groundwork for the
+++ * 		transition, including resizing the socket buffer. The eBPF
+++ * 		program is expected to fill the new headers, if any, via
+++ * 		**skb_store_bytes**\ () and to recompute the checksums with
+++ * 		**bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+++ * 		(). The main case for this helper is to perform NAT64
+++ * 		operations out of an eBPF program.
+++ *
+++ * 		Internally, the GSO type is marked as dodgy so that headers are
+++ * 		checked and segments are recalculated by the GSO/GRO engine.
+++ * 		The size for GSO target is adapted as well.
+++ *
+++ * 		All values for *flags* are reserved for future usage, and must
+++ * 		be left at zero.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+++ * 	Description
+++ * 		Change the packet type for the packet associated to *skb*. This
+++ * 		comes down to setting *skb*\ **->pkt_type** to *type*, except
+++ * 		the eBPF program does not have a write access to *skb*\
+++ * 		**->pkt_type** beside this helper. Using a helper here allows
+++ * 		for graceful handling of errors.
+++ *
+++ * 		The major use case is to change incoming *skb*s to
+++ * 		**PACKET_HOST** in a programmatic way instead of having to
+++ * 		recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+++ * 		example.
+++ *
+++ * 		Note that *type* only allows certain values. At this time, they
+++ * 		are:
+++ *
+++ * 		**PACKET_HOST**
+++ * 			Packet is for us.
+++ * 		**PACKET_BROADCAST**
+++ * 			Send packet to all.
+++ * 		**PACKET_MULTICAST**
+++ * 			Send packet to group.
+++ * 		**PACKET_OTHERHOST**
+++ * 			Send packet to someone else.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+++ * 	Description
+++ * 		Check whether *skb* is a descendant of the cgroup2 held by
+++ * 		*map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+++ * 	Return
+++ * 		The return value depends on the result of the test, and can be:
+++ *
+++ * 		* 0, if the *skb* failed the cgroup2 descendant test.
+++ * 		* 1, if the *skb* succeeded the cgroup2 descendant test.
+++ * 		* A negative error code, if an error occurred.
+++ *
+++ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+++ * 	Description
+++ * 		Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+++ * 		not set, in particular if the hash was cleared due to mangling,
+++ * 		recompute this hash. Later accesses to the hash can be done
+++ * 		directly with *skb*\ **->hash**.
+++ *
+++ * 		Calling **bpf_set_hash_invalid**\ (), changing a packet
+++ * 		prototype with **bpf_skb_change_proto**\ (), or calling
+++ * 		**bpf_skb_store_bytes**\ () with the
+++ * 		**BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+++ * 		the hash and to trigger a new computation for the next call to
+++ * 		**bpf_get_hash_recalc**\ ().
+++ * 	Return
+++ * 		The 32-bit hash.
+++ *
+++ * u64 bpf_get_current_task(void)
+++ * 	Return
+++ * 		A pointer to the current task struct.
+++ *
+++ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+++ * 	Description
+++ * 		Attempt in a safe way to write *len* bytes from the buffer
+++ * 		*src* to *dst* in memory. It only works for threads that are in
+++ * 		user context, and *dst* must be a valid user space address.
+++ *
+++ * 		This helper should not be used to implement any kind of
+++ * 		security mechanism because of TOC-TOU attacks, but rather to
+++ * 		debug, divert, and manipulate execution of semi-cooperative
+++ * 		processes.
+++ *
+++ * 		Keep in mind that this feature is meant for experiments, and it
+++ * 		has a risk of crashing the system and running programs.
+++ * 		Therefore, when an eBPF program using this helper is attached,
+++ * 		a warning including PID and process name is printed to kernel
+++ * 		logs.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+++ * 	Description
+++ * 		Check whether the probe is being run is the context of a given
+++ * 		subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+++ * 		*map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+++ * 	Return
+++ * 		The return value depends on the result of the test, and can be:
+++ *
+++ *		* 0, if current task belongs to the cgroup2.
+++ *		* 1, if current task does not belong to the cgroup2.
+++ * 		* A negative error code, if an error occurred.
+++ *
+++ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+++ * 	Description
+++ * 		Resize (trim or grow) the packet associated to *skb* to the
+++ * 		new *len*. The *flags* are reserved for future usage, and must
+++ * 		be left at zero.
+++ *
+++ * 		The basic idea is that the helper performs the needed work to
+++ * 		change the size of the packet, then the eBPF program rewrites
+++ * 		the rest via helpers like **bpf_skb_store_bytes**\ (),
+++ * 		**bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+++ * 		and others. This helper is a slow path utility intended for
+++ * 		replies with control messages. And because it is targeted for
+++ * 		slow path, the helper itself can afford to be slow: it
+++ * 		implicitly linearizes, unclones and drops offloads from the
+++ * 		*skb*.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+++ * 	Description
+++ * 		Pull in non-linear data in case the *skb* is non-linear and not
+++ * 		all of *len* are part of the linear section. Make *len* bytes
+++ * 		from *skb* readable and writable. If a zero value is passed for
+++ * 		*len*, then the whole length of the *skb* is pulled.
+++ *
+++ * 		This helper is only needed for reading and writing with direct
+++ * 		packet access.
+++ *
+++ * 		For direct packet access, testing that offsets to access
+++ * 		are within packet boundaries (test on *skb*\ **->data_end**) is
+++ * 		susceptible to fail if offsets are invalid, or if the requested
+++ * 		data is in non-linear parts of the *skb*. On failure the
+++ * 		program can just bail out, or in the case of a non-linear
+++ * 		buffer, use a helper to make the data available. The
+++ * 		**bpf_skb_load_bytes**\ () helper is a first solution to access
+++ * 		the data. Another one consists in using **bpf_skb_pull_data**
+++ * 		to pull in once the non-linear parts, then retesting and
+++ * 		eventually access the data.
+++ *
+++ * 		At the same time, this also makes sure the *skb* is uncloned,
+++ * 		which is a necessary condition for direct write. As this needs
+++ * 		to be an invariant for the write part only, the verifier
+++ * 		detects writes and adds a prologue that is calling
+++ * 		**bpf_skb_pull_data()** to effectively unclone the *skb* from
+++ * 		the very beginning in case it is indeed cloned.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+++ * 	Description
+++ * 		Add the checksum *csum* into *skb*\ **->csum** in case the
+++ * 		driver has supplied a checksum for the entire packet into that
+++ * 		field. Return an error otherwise. This helper is intended to be
+++ * 		used in combination with **bpf_csum_diff**\ (), in particular
+++ * 		when the checksum needs to be updated after data has been
+++ * 		written into the packet through direct packet access.
+++ * 	Return
+++ * 		The checksum on success, or a negative error code in case of
+++ * 		failure.
+++ *
+++ * void bpf_set_hash_invalid(struct sk_buff *skb)
+++ * 	Description
+++ * 		Invalidate the current *skb*\ **->hash**. It can be used after
+++ * 		mangling on headers through direct packet access, in order to
+++ * 		indicate that the hash is outdated and to trigger a
+++ * 		recalculation the next time the kernel tries to access this
+++ * 		hash or when the **bpf_get_hash_recalc**\ () helper is called.
+++ *
+++ * int bpf_get_numa_node_id(void)
+++ * 	Description
+++ * 		Return the id of the current NUMA node. The primary use case
+++ * 		for this helper is the selection of sockets for the local NUMA
+++ * 		node, when the program is attached to sockets using the
+++ * 		**SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+++ * 		but the helper is also available to other eBPF program types,
+++ * 		similarly to **bpf_get_smp_processor_id**\ ().
+++ * 	Return
+++ * 		The id of current NUMA node.
+++ *
+++ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+++ * 	Description
+++ * 		Grows headroom of packet associated to *skb* and adjusts the
+++ * 		offset of the MAC header accordingly, adding *len* bytes of
+++ * 		space. It automatically extends and reallocates memory as
+++ * 		required.
+++ *
+++ * 		This helper can be used on a layer 3 *skb* to push a MAC header
+++ * 		for redirection into a layer 2 device.
+++ *
+++ * 		All values for *flags* are reserved for future usage, and must
+++ * 		be left at zero.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+++ * 	Description
+++ * 		Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+++ * 		it is possible to use a negative value for *delta*. This helper
+++ * 		can be used to prepare the packet for pushing or popping
+++ * 		headers.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+++ * 	Description
+++ * 		Copy a NUL terminated string from an unsafe address
+++ * 		*unsafe_ptr* to *dst*. The *size* should include the
+++ * 		terminating NUL byte. In case the string length is smaller than
+++ * 		*size*, the target is not padded with further NUL bytes. If the
+++ * 		string length is larger than *size*, just *size*-1 bytes are
+++ * 		copied and the last byte is set to NUL.
+++ *
+++ * 		On success, the length of the copied string is returned. This
+++ * 		makes this helper useful in tracing programs for reading
+++ * 		strings, and more importantly to get its length at runtime. See
+++ * 		the following snippet:
+++ *
+++ * 		::
+++ *
+++ * 			SEC("kprobe/sys_open")
+++ * 			void bpf_sys_open(struct pt_regs *ctx)
+++ * 			{
+++ * 			        char buf[PATHLEN]; // PATHLEN is defined to 256
+++ * 			        int res = bpf_probe_read_str(buf, sizeof(buf),
+++ * 				                             ctx->di);
+++ *
+++ * 				// Consume buf, for example push it to
+++ * 				// userspace via bpf_perf_event_output(); we
+++ * 				// can use res (the string length) as event
+++ * 				// size, after checking its boundaries.
+++ * 			}
+++ *
+++ * 		In comparison, using **bpf_probe_read()** helper here instead
+++ * 		to read the string would require to estimate the length at
+++ * 		compile time, and would often result in copying more memory
+++ * 		than necessary.
+++ *
+++ * 		Another useful use case is when parsing individual process
+++ * 		arguments or individual environment variables navigating
+++ * 		*current*\ **->mm->arg_start** and *current*\
+++ * 		**->mm->env_start**: using this helper and the return value,
+++ * 		one can quickly iterate at the right offset of the memory area.
+++ * 	Return
+++ * 		On success, the strictly positive length of the string,
+++ * 		including the trailing NUL character. On error, a negative
+++ * 		value.
+++ *
+++ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+++ * 	Description
+++ * 		If the **struct sk_buff** pointed by *skb* has a known socket,
+++ * 		retrieve the cookie (generated by the kernel) of this socket.
+++ * 		If no cookie has been set yet, generate a new cookie. Once
+++ * 		generated, the socket cookie remains stable for the life of the
+++ * 		socket. This helper can be useful for monitoring per socket
+++ * 		networking traffic statistics as it provides a global socket
+++ * 		identifier that can be assumed unique.
+++ * 	Return
+++ * 		A 8-byte long non-decreasing number on success, or 0 if the
+++ * 		socket field is missing inside *skb*.
+++ *
+++ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+++ * 	Description
+++ * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+++ * 		*skb*, but gets socket from **struct bpf_sock_addr** context.
+++ * 	Return
+++ * 		A 8-byte long non-decreasing number.
+++ *
+++ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+++ * 	Description
+++ * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+++ * 		*skb*, but gets socket from **struct bpf_sock_ops** context.
+++ * 	Return
+++ * 		A 8-byte long non-decreasing number.
+++ *
+++ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+++ * 	Return
+++ * 		The owner UID of the socket associated to *skb*. If the socket
+++ * 		is **NULL**, or if it is not a full socket (i.e. if it is a
+++ * 		time-wait or a request socket instead), **overflowuid** value
+++ * 		is returned (note that **overflowuid** might also be the actual
+++ * 		UID value for the socket).
+++ *
+++ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+++ * 	Description
+++ * 		Set the full hash for *skb* (set the field *skb*\ **->hash**)
+++ * 		to value *hash*.
+++ * 	Return
+++ * 		0
+++ *
+++ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+++ * 	Description
+++ * 		Emulate a call to **setsockopt()** on the socket associated to
+++ * 		*bpf_socket*, which must be a full socket. The *level* at
+++ * 		which the option resides and the name *optname* of the option
+++ * 		must be specified, see **setsockopt(2)** for more information.
+++ * 		The option value of length *optlen* is pointed by *optval*.
+++ *
+++ * 		This helper actually implements a subset of **setsockopt()**.
+++ * 		It supports the following *level*\ s:
+++ *
+++ * 		* **SOL_SOCKET**, which supports the following *optname*\ s:
+++ * 		  **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+++ * 		  **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+++ * 		* **IPPROTO_TCP**, which supports the following *optname*\ s:
+++ * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
+++ * 		  **TCP_BPF_SNDCWND_CLAMP**.
+++ * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+++ * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
+++ * 	Description
+++ * 		Grow or shrink the room for data in the packet associated to
+++ * 		*skb* by *len_diff*, and according to the selected *mode*.
+++ *
+++ *		There are two supported modes at this time:
+++ *
+++ *		* **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+++ *		  (room space is added or removed below the layer 2 header).
+++ *
+++ * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+++ * 		  (room space is added or removed below the layer 3 header).
+++ *
+++ *		The following flags are supported at this time:
+++ *
+++ *		* **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+++ *		  Adjusting mss in this way is not allowed for datagrams.
+++ *
+++ *		* **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**,
+++ *		  **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**:
+++ *		  Any new space is reserved to hold a tunnel header.
+++ *		  Configure skb offsets and other fields accordingly.
+++ *
+++ *		* **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**,
+++ *		  **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**:
+++ *		  Use with ENCAP_L3 flags to further specify the tunnel type.
+++ *
+++ *		* **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*):
+++ *		  Use with ENCAP_L3/L4 flags to further specify the tunnel
+++ *		  type; *len* is the length of the inner MAC header.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+++ * 	Description
+++ * 		Redirect the packet to the endpoint referenced by *map* at
+++ * 		index *key*. Depending on its type, this *map* can contain
+++ * 		references to net devices (for forwarding packets through other
+++ * 		ports), or to CPUs (for redirecting XDP frames to another CPU;
+++ * 		but this is only implemented for native XDP (with driver
+++ * 		support) as of this writing).
+++ *
+++ * 		The lower two bits of *flags* are used as the return code if
+++ * 		the map lookup fails. This is so that the return value can be
+++ * 		one of the XDP program return codes up to XDP_TX, as chosen by
+++ * 		the caller. Any higher bits in the *flags* argument must be
+++ * 		unset.
+++ *
+++ * 		When used to redirect packets to net devices, this helper
+++ * 		provides a high performance increase over **bpf_redirect**\ ().
+++ * 		This is due to various implementation details of the underlying
+++ * 		mechanisms, one of which is the fact that **bpf_redirect_map**\
+++ * 		() tries to send packet as a "bulk" to the device.
+++ * 	Return
+++ * 		**XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+++ *
+++ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+++ * 	Description
+++ * 		Redirect the packet to the socket referenced by *map* (of type
+++ * 		**BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+++ * 		egress interfaces can be used for redirection. The
+++ * 		**BPF_F_INGRESS** value in *flags* is used to make the
+++ * 		distinction (ingress path is selected if the flag is present,
+++ * 		egress path otherwise). This is the only flag supported for now.
+++ * 	Return
+++ * 		**SK_PASS** on success, or **SK_DROP** on error.
+++ *
+++ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+++ * 	Description
+++ * 		Add an entry to, or update a *map* referencing sockets. The
+++ * 		*skops* is used as a new value for the entry associated to
+++ * 		*key*. *flags* is one of:
+++ *
+++ * 		**BPF_NOEXIST**
+++ * 			The entry for *key* must not exist in the map.
+++ * 		**BPF_EXIST**
+++ * 			The entry for *key* must already exist in the map.
+++ * 		**BPF_ANY**
+++ * 			No condition on the existence of the entry for *key*.
+++ *
+++ * 		If the *map* has eBPF programs (parser and verdict), those will
+++ * 		be inherited by the socket being added. If the socket is
+++ * 		already attached to eBPF programs, this results in an error.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+++ * 	Description
+++ * 		Adjust the address pointed by *xdp_md*\ **->data_meta** by
+++ * 		*delta* (which can be positive or negative). Note that this
+++ * 		operation modifies the address stored in *xdp_md*\ **->data**,
+++ * 		so the latter must be loaded only after the helper has been
+++ * 		called.
+++ *
+++ * 		The use of *xdp_md*\ **->data_meta** is optional and programs
+++ * 		are not required to use it. The rationale is that when the
+++ * 		packet is processed with XDP (e.g. as DoS filter), it is
+++ * 		possible to push further meta data along with it before passing
+++ * 		to the stack, and to give the guarantee that an ingress eBPF
+++ * 		program attached as a TC classifier on the same device can pick
+++ * 		this up for further post-processing. Since TC works with socket
+++ * 		buffers, it remains possible to set from XDP the **mark** or
+++ * 		**priority** pointers, or other pointers for the socket buffer.
+++ * 		Having this scratch space generic and programmable allows for
+++ * 		more flexibility as the user is free to store whatever meta
+++ * 		data they need.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+++ * 	Description
+++ * 		Read the value of a perf event counter, and store it into *buf*
+++ * 		of size *buf_size*. This helper relies on a *map* of type
+++ * 		**BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+++ * 		counter is selected when *map* is updated with perf event file
+++ * 		descriptors. The *map* is an array whose size is the number of
+++ * 		available CPUs, and each cell contains a value relative to one
+++ * 		CPU. The value to retrieve is indicated by *flags*, that
+++ * 		contains the index of the CPU to look up, masked with
+++ * 		**BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+++ * 		**BPF_F_CURRENT_CPU** to indicate that the value for the
+++ * 		current CPU should be retrieved.
+++ *
+++ * 		This helper behaves in a way close to
+++ * 		**bpf_perf_event_read**\ () helper, save that instead of
+++ * 		just returning the value observed, it fills the *buf*
+++ * 		structure. This allows for additional data to be retrieved: in
+++ * 		particular, the enabled and running times (in *buf*\
+++ * 		**->enabled** and *buf*\ **->running**, respectively) are
+++ * 		copied. In general, **bpf_perf_event_read_value**\ () is
+++ * 		recommended over **bpf_perf_event_read**\ (), which has some
+++ * 		ABI issues and provides fewer functionalities.
+++ *
+++ * 		These values are interesting, because hardware PMU (Performance
+++ * 		Monitoring Unit) counters are limited resources. When there are
+++ * 		more PMU based perf events opened than available counters,
+++ * 		kernel will multiplex these events so each event gets certain
+++ * 		percentage (but not all) of the PMU time. In case that
+++ * 		multiplexing happens, the number of samples or counter value
+++ * 		will not reflect the case compared to when no multiplexing
+++ * 		occurs. This makes comparison between different runs difficult.
+++ * 		Typically, the counter value should be normalized before
+++ * 		comparing to other experiments. The usual normalization is done
+++ * 		as follows.
+++ *
+++ * 		::
+++ *
+++ * 			normalized_counter = counter * t_enabled / t_running
+++ *
+++ * 		Where t_enabled is the time enabled for event and t_running is
+++ * 		the time running for event since last normalization. The
+++ * 		enabled and running times are accumulated since the perf event
+++ * 		open. To achieve scaling factor between two invocations of an
+++ * 		eBPF program, users can can use CPU id as the key (which is
+++ * 		typical for perf array usage model) to remember the previous
+++ * 		value and do the calculation inside the eBPF program.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+++ * 	Description
+++ * 		For en eBPF program attached to a perf event, retrieve the
+++ * 		value of the event counter associated to *ctx* and store it in
+++ * 		the structure pointed by *buf* and of size *buf_size*. Enabled
+++ * 		and running times are also stored in the structure (see
+++ * 		description of helper **bpf_perf_event_read_value**\ () for
+++ * 		more details).
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+++ * 	Description
+++ * 		Emulate a call to **getsockopt()** on the socket associated to
+++ * 		*bpf_socket*, which must be a full socket. The *level* at
+++ * 		which the option resides and the name *optname* of the option
+++ * 		must be specified, see **getsockopt(2)** for more information.
+++ * 		The retrieved value is stored in the structure pointed by
+++ * 		*opval* and of length *optlen*.
+++ *
+++ * 		This helper actually implements a subset of **getsockopt()**.
+++ * 		It supports the following *level*\ s:
+++ *
+++ * 		* **IPPROTO_TCP**, which supports *optname*
+++ * 		  **TCP_CONGESTION**.
+++ * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+++ * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_override_return(struct pt_regs *regs, u64 rc)
+++ * 	Description
+++ * 		Used for error injection, this helper uses kprobes to override
+++ * 		the return value of the probed function, and to set it to *rc*.
+++ * 		The first argument is the context *regs* on which the kprobe
+++ * 		works.
+++ *
+++ * 		This helper works by setting setting the PC (program counter)
+++ * 		to an override function which is run in place of the original
+++ * 		probed function. This means the probed function is not run at
+++ * 		all. The replacement function just returns with the required
+++ * 		value.
+++ *
+++ * 		This helper has security implications, and thus is subject to
+++ * 		restrictions. It is only available if the kernel was compiled
+++ * 		with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+++ * 		option, and in this case it only works on functions tagged with
+++ * 		**ALLOW_ERROR_INJECTION** in the kernel code.
+++ *
+++ * 		Also, the helper is only available for the architectures having
+++ * 		the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+++ * 		x86 architecture is the only one to support this feature.
+++ * 	Return
+++ * 		0
+++ *
+++ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+++ * 	Description
+++ * 		Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+++ * 		for the full TCP socket associated to *bpf_sock_ops* to
+++ * 		*argval*.
+++ *
+++ * 		The primary use of this field is to determine if there should
+++ * 		be calls to eBPF programs of type
+++ * 		**BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+++ * 		code. A program of the same type can change its value, per
+++ * 		connection and as necessary, when the connection is
+++ * 		established. This field is directly accessible for reading, but
+++ * 		this helper must be used for updates in order to return an
+++ * 		error if an eBPF program tries to set a callback that is not
+++ * 		supported in the current kernel.
+++ *
+++ * 		*argval* is a flag array which can combine these flags:
+++ *
+++ * 		* **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+++ * 		* **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+++ * 		* **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+++ * 		* **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
+++ *
+++ * 		Therefore, this function can be used to clear a callback flag by
+++ * 		setting the appropriate bit to zero. e.g. to disable the RTO
+++ * 		callback:
+++ *
+++ * 		**bpf_sock_ops_cb_flags_set(bpf_sock,**
+++ * 			**bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)**
+++ *
+++ * 		Here are some examples of where one could call such eBPF
+++ * 		program:
+++ *
+++ * 		* When RTO fires.
+++ * 		* When a packet is retransmitted.
+++ * 		* When the connection terminates.
+++ * 		* When a packet is sent.
+++ * 		* When a packet is received.
+++ * 	Return
+++ * 		Code **-EINVAL** if the socket is not a full TCP socket;
+++ * 		otherwise, a positive number containing the bits that could not
+++ * 		be set is returned (which comes down to 0 if all bits were set
+++ * 		as required).
+++ *
+++ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+++ * 	Description
+++ * 		This helper is used in programs implementing policies at the
+++ * 		socket level. If the message *msg* is allowed to pass (i.e. if
+++ * 		the verdict eBPF program returns **SK_PASS**), redirect it to
+++ * 		the socket referenced by *map* (of type
+++ * 		**BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+++ * 		egress interfaces can be used for redirection. The
+++ * 		**BPF_F_INGRESS** value in *flags* is used to make the
+++ * 		distinction (ingress path is selected if the flag is present,
+++ * 		egress path otherwise). This is the only flag supported for now.
+++ * 	Return
+++ * 		**SK_PASS** on success, or **SK_DROP** on error.
+++ *
+++ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+++ * 	Description
+++ * 		For socket policies, apply the verdict of the eBPF program to
+++ * 		the next *bytes* (number of bytes) of message *msg*.
+++ *
+++ * 		For example, this helper can be used in the following cases:
+++ *
+++ * 		* A single **sendmsg**\ () or **sendfile**\ () system call
+++ * 		  contains multiple logical messages that the eBPF program is
+++ * 		  supposed to read and for which it should apply a verdict.
+++ * 		* An eBPF program only cares to read the first *bytes* of a
+++ * 		  *msg*. If the message has a large payload, then setting up
+++ * 		  and calling the eBPF program repeatedly for all bytes, even
+++ * 		  though the verdict is already known, would create unnecessary
+++ * 		  overhead.
+++ *
+++ * 		When called from within an eBPF program, the helper sets a
+++ * 		counter internal to the BPF infrastructure, that is used to
+++ * 		apply the last verdict to the next *bytes*. If *bytes* is
+++ * 		smaller than the current data being processed from a
+++ * 		**sendmsg**\ () or **sendfile**\ () system call, the first
+++ * 		*bytes* will be sent and the eBPF program will be re-run with
+++ * 		the pointer for start of data pointing to byte number *bytes*
+++ * 		**+ 1**. If *bytes* is larger than the current data being
+++ * 		processed, then the eBPF verdict will be applied to multiple
+++ * 		**sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+++ * 		consumed.
+++ *
+++ * 		Note that if a socket closes with the internal counter holding
+++ * 		a non-zero value, this is not a problem because data is not
+++ * 		being buffered for *bytes* and is sent as it is received.
+++ * 	Return
+++ * 		0
+++ *
+++ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+++ * 	Description
+++ * 		For socket policies, prevent the execution of the verdict eBPF
+++ * 		program for message *msg* until *bytes* (byte number) have been
+++ * 		accumulated.
+++ *
+++ * 		This can be used when one needs a specific number of bytes
+++ * 		before a verdict can be assigned, even if the data spans
+++ * 		multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+++ * 		case would be a user calling **sendmsg**\ () repeatedly with
+++ * 		1-byte long message segments. Obviously, this is bad for
+++ * 		performance, but it is still valid. If the eBPF program needs
+++ * 		*bytes* bytes to validate a header, this helper can be used to
+++ * 		prevent the eBPF program to be called again until *bytes* have
+++ * 		been accumulated.
+++ * 	Return
+++ * 		0
+++ *
+++ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+++ * 	Description
+++ * 		For socket policies, pull in non-linear data from user space
+++ * 		for *msg* and set pointers *msg*\ **->data** and *msg*\
+++ * 		**->data_end** to *start* and *end* bytes offsets into *msg*,
+++ * 		respectively.
+++ *
+++ * 		If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+++ * 		*msg* it can only parse data that the (**data**, **data_end**)
+++ * 		pointers have already consumed. For **sendmsg**\ () hooks this
+++ * 		is likely the first scatterlist element. But for calls relying
+++ * 		on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+++ * 		be the range (**0**, **0**) because the data is shared with
+++ * 		user space and by default the objective is to avoid allowing
+++ * 		user space to modify data while (or after) eBPF verdict is
+++ * 		being decided. This helper can be used to pull in data and to
+++ * 		set the start and end pointer to given values. Data will be
+++ * 		copied if necessary (i.e. if data was not linear and if start
+++ * 		and end pointers do not point to the same chunk).
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ *
+++ * 		All values for *flags* are reserved for future usage, and must
+++ * 		be left at zero.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+++ * 	Description
+++ * 		Bind the socket associated to *ctx* to the address pointed by
+++ * 		*addr*, of length *addr_len*. This allows for making outgoing
+++ * 		connection from the desired IP address, which can be useful for
+++ * 		example when all processes inside a cgroup should use one
+++ * 		single IP address on a host that has multiple IP configured.
+++ *
+++ * 		This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+++ * 		domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+++ * 		**AF_INET6**). Looking for a free port to bind to can be
+++ * 		expensive, therefore binding to port is not permitted by the
+++ * 		helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+++ * 		must be set to zero.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+++ * 	Description
+++ * 		Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+++ * 		only possible to shrink the packet as of this writing,
+++ * 		therefore *delta* must be a negative integer.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+++ * 	Description
+++ * 		Retrieve the XFRM state (IP transform framework, see also
+++ * 		**ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+++ *
+++ * 		The retrieved value is stored in the **struct bpf_xfrm_state**
+++ * 		pointed by *xfrm_state* and of length *size*.
+++ *
+++ * 		All values for *flags* are reserved for future usage, and must
+++ * 		be left at zero.
+++ *
+++ * 		This helper is available only if the kernel was compiled with
+++ * 		**CONFIG_XFRM** configuration option.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+++ * 	Description
+++ * 		Return a user or a kernel stack in bpf program provided buffer.
+++ * 		To achieve this, the helper needs *ctx*, which is a pointer
+++ * 		to the context on which the tracing program is executed.
+++ * 		To store the stacktrace, the bpf program provides *buf* with
+++ * 		a nonnegative *size*.
+++ *
+++ * 		The last argument, *flags*, holds the number of stack frames to
+++ * 		skip (from 0 to 255), masked with
+++ * 		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+++ * 		the following flags:
+++ *
+++ * 		**BPF_F_USER_STACK**
+++ * 			Collect a user space stack instead of a kernel stack.
+++ * 		**BPF_F_USER_BUILD_ID**
+++ * 			Collect buildid+offset instead of ips for user stack,
+++ * 			only valid if **BPF_F_USER_STACK** is also specified.
+++ *
+++ * 		**bpf_get_stack**\ () can collect up to
+++ * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+++ * 		to sufficient large buffer size. Note that
+++ * 		this limit can be controlled with the **sysctl** program, and
+++ * 		that it should be manually increased in order to profile long
+++ * 		user stacks (such as stacks for Java programs). To do so, use:
+++ *
+++ * 		::
+++ *
+++ * 			# sysctl kernel.perf_event_max_stack=<new value>
+++ * 	Return
+++ * 		A non-negative value equal to or less than *size* on success,
+++ * 		or a negative error in case of failure.
+++ *
+++ * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+++ * 	Description
+++ * 		This helper is similar to **bpf_skb_load_bytes**\ () in that
+++ * 		it provides an easy way to load *len* bytes from *offset*
+++ * 		from the packet associated to *skb*, into the buffer pointed
+++ * 		by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+++ * 		a fifth argument *start_header* exists in order to select a
+++ * 		base offset to start from. *start_header* can be one of:
+++ *
+++ * 		**BPF_HDR_START_MAC**
+++ * 			Base offset to load data from is *skb*'s mac header.
+++ * 		**BPF_HDR_START_NET**
+++ * 			Base offset to load data from is *skb*'s network header.
+++ *
+++ * 		In general, "direct packet access" is the preferred method to
+++ * 		access packet data, however, this helper is in particular useful
+++ * 		in socket filters where *skb*\ **->data** does not always point
+++ * 		to the start of the mac header and where "direct packet access"
+++ * 		is not available.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+++ *	Description
+++ *		Do FIB lookup in kernel tables using parameters in *params*.
+++ *		If lookup is successful and result shows packet is to be
+++ *		forwarded, the neighbor tables are searched for the nexthop.
+++ *		If successful (ie., FIB lookup shows forwarding and nexthop
+++ *		is resolved), the nexthop address is returned in ipv4_dst
+++ *		or ipv6_dst based on family, smac is set to mac address of
+++ *		egress device, dmac is set to nexthop mac address, rt_metric
+++ *		is set to metric from route (IPv4/IPv6 only), and ifindex
+++ *		is set to the device index of the nexthop from the FIB lookup.
+++ *
+++ *		*plen* argument is the size of the passed in struct.
+++ *		*flags* argument can be a combination of one or more of the
+++ *		following values:
+++ *
+++ *		**BPF_FIB_LOOKUP_DIRECT**
+++ *			Do a direct table lookup vs full lookup using FIB
+++ *			rules.
+++ *		**BPF_FIB_LOOKUP_OUTPUT**
+++ *			Perform lookup from an egress perspective (default is
+++ *			ingress).
+++ *
+++ *		*ctx* is either **struct xdp_md** for XDP programs or
+++ *		**struct sk_buff** tc cls_act programs.
+++ *	Return
+++ *		* < 0 if any input argument is invalid
+++ *		*   0 on success (packet is forwarded, nexthop neighbor exists)
+++ *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+++ *		  packet is not forwarded or needs assist from full stack
+++ *
+++ * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+++ *	Description
+++ *		Add an entry to, or update a sockhash *map* referencing sockets.
+++ *		The *skops* is used as a new value for the entry associated to
+++ *		*key*. *flags* is one of:
+++ *
+++ *		**BPF_NOEXIST**
+++ *			The entry for *key* must not exist in the map.
+++ *		**BPF_EXIST**
+++ *			The entry for *key* must already exist in the map.
+++ *		**BPF_ANY**
+++ *			No condition on the existence of the entry for *key*.
+++ *
+++ *		If the *map* has eBPF programs (parser and verdict), those will
+++ *		be inherited by the socket being added. If the socket is
+++ *		already attached to eBPF programs, this results in an error.
+++ *	Return
+++ *		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+++ *	Description
+++ *		This helper is used in programs implementing policies at the
+++ *		socket level. If the message *msg* is allowed to pass (i.e. if
+++ *		the verdict eBPF program returns **SK_PASS**), redirect it to
+++ *		the socket referenced by *map* (of type
+++ *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+++ *		egress interfaces can be used for redirection. The
+++ *		**BPF_F_INGRESS** value in *flags* is used to make the
+++ *		distinction (ingress path is selected if the flag is present,
+++ *		egress path otherwise). This is the only flag supported for now.
+++ *	Return
+++ *		**SK_PASS** on success, or **SK_DROP** on error.
+++ *
+++ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+++ *	Description
+++ *		This helper is used in programs implementing policies at the
+++ *		skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+++ *		if the verdeict eBPF program returns **SK_PASS**), redirect it
+++ *		to the socket referenced by *map* (of type
+++ *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+++ *		egress interfaces can be used for redirection. The
+++ *		**BPF_F_INGRESS** value in *flags* is used to make the
+++ *		distinction (ingress path is selected if the flag is present,
+++ *		egress otherwise). This is the only flag supported for now.
+++ *	Return
+++ *		**SK_PASS** on success, or **SK_DROP** on error.
+++ *
+++ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+++ *	Description
+++ *		Encapsulate the packet associated to *skb* within a Layer 3
+++ *		protocol header. This header is provided in the buffer at
+++ *		address *hdr*, with *len* its size in bytes. *type* indicates
+++ *		the protocol of the header and can be one of:
+++ *
+++ *		**BPF_LWT_ENCAP_SEG6**
+++ *			IPv6 encapsulation with Segment Routing Header
+++ *			(**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+++ *			the IPv6 header is computed by the kernel.
+++ *		**BPF_LWT_ENCAP_SEG6_INLINE**
+++ *			Only works if *skb* contains an IPv6 packet. Insert a
+++ *			Segment Routing Header (**struct ipv6_sr_hdr**) inside
+++ *			the IPv6 header.
+++ *		**BPF_LWT_ENCAP_IP**
+++ *			IP encapsulation (GRE/GUE/IPIP/etc). The outer header
+++ *			must be IPv4 or IPv6, followed by zero or more
+++ *			additional headers, up to **LWT_BPF_MAX_HEADROOM**
+++ *			total bytes in all prepended headers. Please note that
+++ *			if **skb_is_gso**\ (*skb*) is true, no more than two
+++ *			headers can be prepended, and the inner header, if
+++ *			present, should be either GRE or UDP/GUE.
+++ *
+++ *		**BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs
+++ *		of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can
+++ *		be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and
+++ *		**BPF_PROG_TYPE_LWT_XMIT**.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ *	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+++ *	Description
+++ *		Store *len* bytes from address *from* into the packet
+++ *		associated to *skb*, at *offset*. Only the flags, tag and TLVs
+++ *		inside the outermost IPv6 Segment Routing Header can be
+++ *		modified through this helper.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ *	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+++ *	Description
+++ *		Adjust the size allocated to TLVs in the outermost IPv6
+++ *		Segment Routing Header contained in the packet associated to
+++ *		*skb*, at position *offset* by *delta* bytes. Only offsets
+++ *		after the segments are accepted. *delta* can be as well
+++ *		positive (growing) as negative (shrinking).
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ *	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+++ *	Description
+++ *		Apply an IPv6 Segment Routing action of type *action* to the
+++ *		packet associated to *skb*. Each action takes a parameter
+++ *		contained at address *param*, and of length *param_len* bytes.
+++ *		*action* can be one of:
+++ *
+++ *		**SEG6_LOCAL_ACTION_END_X**
+++ *			End.X action: Endpoint with Layer-3 cross-connect.
+++ *			Type of *param*: **struct in6_addr**.
+++ *		**SEG6_LOCAL_ACTION_END_T**
+++ *			End.T action: Endpoint with specific IPv6 table lookup.
+++ *			Type of *param*: **int**.
+++ *		**SEG6_LOCAL_ACTION_END_B6**
+++ *			End.B6 action: Endpoint bound to an SRv6 policy.
+++ *			Type of *param*: **struct ipv6_sr_hdr**.
+++ *		**SEG6_LOCAL_ACTION_END_B6_ENCAP**
+++ *			End.B6.Encap action: Endpoint bound to an SRv6
+++ *			encapsulation policy.
+++ *			Type of *param*: **struct ipv6_sr_hdr**.
+++ *
+++ * 		A call to this helper is susceptible to change the underlying
+++ * 		packet buffer. Therefore, at load time, all checks on pointers
+++ * 		previously done by the verifier are invalidated and must be
+++ * 		performed again, if the helper is used in combination with
+++ * 		direct packet access.
+++ *	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_rc_repeat(void *ctx)
+++ *	Description
+++ *		This helper is used in programs implementing IR decoding, to
+++ *		report a successfully decoded repeat key message. This delays
+++ *		the generation of a key up event for previously generated
+++ *		key down event.
+++ *
+++ *		Some IR protocols like NEC have a special IR message for
+++ *		repeating last button, for when a button is held down.
+++ *
+++ *		The *ctx* should point to the lirc sample as passed into
+++ *		the program.
+++ *
+++ *		This helper is only available is the kernel was compiled with
+++ *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+++ *		"**y**".
+++ *	Return
+++ *		0
+++ *
+++ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+++ *	Description
+++ *		This helper is used in programs implementing IR decoding, to
+++ *		report a successfully decoded key press with *scancode*,
+++ *		*toggle* value in the given *protocol*. The scancode will be
+++ *		translated to a keycode using the rc keymap, and reported as
+++ *		an input key down event. After a period a key up event is
+++ *		generated. This period can be extended by calling either
+++ *		**bpf_rc_keydown**\ () again with the same values, or calling
+++ *		**bpf_rc_repeat**\ ().
+++ *
+++ *		Some protocols include a toggle bit, in case the button	was
+++ *		released and pressed again between consecutive scancodes.
+++ *
+++ *		The *ctx* should point to the lirc sample as passed into
+++ *		the program.
+++ *
+++ *		The *protocol* is the decoded protocol number (see
+++ *		**enum rc_proto** for some predefined values).
+++ *
+++ *		This helper is only available is the kernel was compiled with
+++ *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+++ *		"**y**".
+++ *	Return
+++ *		0
+++ *
+++ * u64 bpf_skb_cgroup_id(struct sk_buff *skb)
+++ * 	Description
+++ * 		Return the cgroup v2 id of the socket associated with the *skb*.
+++ * 		This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+++ * 		helper for cgroup v1 by providing a tag resp. identifier that
+++ * 		can be matched on or used for map lookups e.g. to implement
+++ * 		policy. The cgroup v2 id of a given path in the hierarchy is
+++ * 		exposed in user space through the f_handle API in order to get
+++ * 		to the same 64-bit id.
+++ *
+++ * 		This helper can be used on TC egress path, but not on ingress,
+++ * 		and is available only if the kernel was compiled with the
+++ * 		**CONFIG_SOCK_CGROUP_DATA** configuration option.
+++ * 	Return
+++ * 		The id is returned or 0 in case the id could not be retrieved.
+++ *
+++ * u64 bpf_get_current_cgroup_id(void)
+++ * 	Return
+++ * 		A 64-bit integer containing the current cgroup id based
+++ * 		on the cgroup within which the current task is running.
+++ *
+++ * void *bpf_get_local_storage(void *map, u64 flags)
+++ *	Description
+++ *		Get the pointer to the local storage area.
+++ *		The type and the size of the local storage is defined
+++ *		by the *map* argument.
+++ *		The *flags* meaning is specific for each map type,
+++ *		and has to be 0 for cgroup local storage.
+++ *
+++ *		Depending on the BPF program type, a local storage area
+++ *		can be shared between multiple instances of the BPF program,
+++ *		running simultaneously.
+++ *
+++ *		A user should care about the synchronization by himself.
+++ *		For example, by using the **BPF_STX_XADD** instruction to alter
+++ *		the shared data.
+++ *	Return
+++ *		A pointer to the local storage area.
+++ *
+++ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+++ *	Description
+++ *		Select a **SO_REUSEPORT** socket from a
+++ *		**BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+++ *		It checks the selected socket is matching the incoming
+++ *		request in the socket buffer.
+++ *	Return
+++ *		0 on success, or a negative error in case of failure.
+++ *
+++ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+++ *	Description
+++ *		Return id of cgroup v2 that is ancestor of cgroup associated
+++ *		with the *skb* at the *ancestor_level*.  The root cgroup is at
+++ *		*ancestor_level* zero and each step down the hierarchy
+++ *		increments the level. If *ancestor_level* == level of cgroup
+++ *		associated with *skb*, then return value will be same as that
+++ *		of **bpf_skb_cgroup_id**\ ().
+++ *
+++ *		The helper is useful to implement policies based on cgroups
+++ *		that are upper in hierarchy than immediate cgroup associated
+++ *		with *skb*.
+++ *
+++ *		The format of returned id and helper limitations are same as in
+++ *		**bpf_skb_cgroup_id**\ ().
+++ *	Return
+++ *		The id is returned or 0 in case the id could not be retrieved.
+++ *
+++ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+++ *	Description
+++ *		Look for TCP socket matching *tuple*, optionally in a child
+++ *		network namespace *netns*. The return value must be checked,
+++ *		and if non-**NULL**, released via **bpf_sk_release**\ ().
+++ *
+++ *		The *ctx* should point to the context of the program, such as
+++ *		the skb or socket (depending on the hook in use). This is used
+++ *		to determine the base network namespace for the lookup.
+++ *
+++ *		*tuple_size* must be one of:
+++ *
+++ *		**sizeof**\ (*tuple*\ **->ipv4**)
+++ *			Look for an IPv4 socket.
+++ *		**sizeof**\ (*tuple*\ **->ipv6**)
+++ *			Look for an IPv6 socket.
+++ *
+++ *		If the *netns* is a negative signed 32-bit integer, then the
+++ *		socket lookup table in the netns associated with the *ctx* will
+++ *		will be used. For the TC hooks, this is the netns of the device
+++ *		in the skb. For socket hooks, this is the netns of the socket.
+++ *		If *netns* is any other signed 32-bit value greater than or
+++ *		equal to zero then it specifies the ID of the netns relative to
+++ *		the netns associated with the *ctx*. *netns* values beyond the
+++ *		range of 32-bit integers are reserved for future use.
+++ *
+++ *		All values for *flags* are reserved for future usage, and must
+++ *		be left at zero.
+++ *
+++ *		This helper is available only if the kernel was compiled with
+++ *		**CONFIG_NET** configuration option.
+++ *	Return
+++ *		Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+++ *		For sockets with reuseport option, the **struct bpf_sock**
+++ *		result is from *reuse*\ **->socks**\ [] using the hash of the
+++ *		tuple.
+++ *
+++ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+++ *	Description
+++ *		Look for UDP socket matching *tuple*, optionally in a child
+++ *		network namespace *netns*. The return value must be checked,
+++ *		and if non-**NULL**, released via **bpf_sk_release**\ ().
+++ *
+++ *		The *ctx* should point to the context of the program, such as
+++ *		the skb or socket (depending on the hook in use). This is used
+++ *		to determine the base network namespace for the lookup.
+++ *
+++ *		*tuple_size* must be one of:
+++ *
+++ *		**sizeof**\ (*tuple*\ **->ipv4**)
+++ *			Look for an IPv4 socket.
+++ *		**sizeof**\ (*tuple*\ **->ipv6**)
+++ *			Look for an IPv6 socket.
+++ *
+++ *		If the *netns* is a negative signed 32-bit integer, then the
+++ *		socket lookup table in the netns associated with the *ctx* will
+++ *		will be used. For the TC hooks, this is the netns of the device
+++ *		in the skb. For socket hooks, this is the netns of the socket.
+++ *		If *netns* is any other signed 32-bit value greater than or
+++ *		equal to zero then it specifies the ID of the netns relative to
+++ *		the netns associated with the *ctx*. *netns* values beyond the
+++ *		range of 32-bit integers are reserved for future use.
+++ *
+++ *		All values for *flags* are reserved for future usage, and must
+++ *		be left at zero.
+++ *
+++ *		This helper is available only if the kernel was compiled with
+++ *		**CONFIG_NET** configuration option.
+++ *	Return
+++ *		Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+++ *		For sockets with reuseport option, the **struct bpf_sock**
+++ *		result is from *reuse*\ **->socks**\ [] using the hash of the
+++ *		tuple.
+++ *
+++ * int bpf_sk_release(struct bpf_sock *sock)
+++ *	Description
+++ *		Release the reference held by *sock*. *sock* must be a
+++ *		non-**NULL** pointer that was returned from
+++ *		**bpf_sk_lookup_xxx**\ ().
+++ *	Return
+++ *		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+++ * 	Description
+++ * 		Push an element *value* in *map*. *flags* is one of:
+++ *
+++ * 		**BPF_EXIST**
+++ * 			If the queue/stack is full, the oldest element is
+++ * 			removed to make room for this.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+++ * 	Description
+++ * 		Pop an element from *map*.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+++ * 	Description
+++ * 		Get an element from *map* without removing it.
+++ * 	Return
+++ * 		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
+++ *	Description
+++ *		For socket policies, insert *len* bytes into *msg* at offset
+++ *		*start*.
+++ *
+++ *		If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+++ *		*msg* it may want to insert metadata or options into the *msg*.
+++ *		This can later be read and used by any of the lower layer BPF
+++ *		hooks.
+++ *
+++ *		This helper may fail if under memory pressure (a malloc
+++ *		fails) in these cases BPF programs will get an appropriate
+++ *		error and BPF programs will need to handle them.
+++ *	Return
+++ *		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
+++ *	Description
+++ *		Will remove *pop* bytes from a *msg* starting at byte *start*.
+++ *		This may result in **ENOMEM** errors under certain situations if
+++ *		an allocation and copy are required due to a full ring buffer.
+++ *		However, the helper will try to avoid doing the allocation
+++ *		if possible. Other errors can occur if input parameters are
+++ *		invalid either due to *start* byte not being valid part of *msg*
+++ *		payload and/or *pop* value being to large.
+++ *	Return
+++ *		0 on success, or a negative error in case of failure.
+++ *
+++ * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
+++ *	Description
+++ *		This helper is used in programs implementing IR decoding, to
+++ *		report a successfully decoded pointer movement.
+++ *
+++ *		The *ctx* should point to the lirc sample as passed into
+++ *		the program.
+++ *
+++ *		This helper is only available is the kernel was compiled with
+++ *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+++ *		"**y**".
+++ *	Return
+++ *		0
+++ *
+++ * int bpf_spin_lock(struct bpf_spin_lock *lock)
+++ *	Description
+++ *		Acquire a spinlock represented by the pointer *lock*, which is
+++ *		stored as part of a value of a map. Taking the lock allows to
+++ *		safely update the rest of the fields in that value. The
+++ *		spinlock can (and must) later be released with a call to
+++ *		**bpf_spin_unlock**\ (\ *lock*\ ).
+++ *
+++ *		Spinlocks in BPF programs come with a number of restrictions
+++ *		and constraints:
+++ *
+++ *		* **bpf_spin_lock** objects are only allowed inside maps of
+++ *		  types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
+++ *		  list could be extended in the future).
+++ *		* BTF description of the map is mandatory.
+++ *		* The BPF program can take ONE lock at a time, since taking two
+++ *		  or more could cause dead locks.
+++ *		* Only one **struct bpf_spin_lock** is allowed per map element.
+++ *		* When the lock is taken, calls (either BPF to BPF or helpers)
+++ *		  are not allowed.
+++ *		* The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
+++ *		  allowed inside a spinlock-ed region.
+++ *		* The BPF program MUST call **bpf_spin_unlock**\ () to release
+++ *		  the lock, on all execution paths, before it returns.
+++ *		* The BPF program can access **struct bpf_spin_lock** only via
+++ *		  the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
+++ *		  helpers. Loading or storing data into the **struct
+++ *		  bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
+++ *		* To use the **bpf_spin_lock**\ () helper, the BTF description
+++ *		  of the map value must be a struct and have **struct
+++ *		  bpf_spin_lock** *anyname*\ **;** field at the top level.
+++ *		  Nested lock inside another struct is not allowed.
+++ *		* The **struct bpf_spin_lock** *lock* field in a map value must
+++ *		  be aligned on a multiple of 4 bytes in that value.
+++ *		* Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
+++ *		  the **bpf_spin_lock** field to user space.
+++ *		* Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
+++ *		  a BPF program, do not update the **bpf_spin_lock** field.
+++ *		* **bpf_spin_lock** cannot be on the stack or inside a
+++ *		  networking packet (it can only be inside of a map values).
+++ *		* **bpf_spin_lock** is available to root only.
+++ *		* Tracing programs and socket filter programs cannot use
+++ *		  **bpf_spin_lock**\ () due to insufficient preemption checks
+++ *		  (but this may change in the future).
+++ *		* **bpf_spin_lock** is not allowed in inner maps of map-in-map.
+++ *	Return
+++ *		0
+++ *
+++ * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+++ *	Description
+++ *		Release the *lock* previously locked by a call to
+++ *		**bpf_spin_lock**\ (\ *lock*\ ).
+++ *	Return
+++ *		0
+++ *
+++ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
+++ *	Description
+++ *		This helper gets a **struct bpf_sock** pointer such
+++ *		that all the fields in this **bpf_sock** can be accessed.
+++ *	Return
+++ *		A **struct bpf_sock** pointer on success, or **NULL** in
+++ *		case of failure.
+++ *
+++ * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
+++ *	Description
+++ *		This helper gets a **struct bpf_tcp_sock** pointer from a
+++ *		**struct bpf_sock** pointer.
+++ *	Return
+++ *		A **struct bpf_tcp_sock** pointer on success, or **NULL** in
+++ *		case of failure.
+++ *
+++ * int bpf_skb_ecn_set_ce(struct sk_buf *skb)
+++ *	Description
+++ *		Set ECN (Explicit Congestion Notification) field of IP header
+++ *		to **CE** (Congestion Encountered) if current value is **ECT**
+++ *		(ECN Capable Transport). Otherwise, do nothing. Works with IPv6
+++ *		and IPv4.
+++ *	Return
+++ *		1 if the **CE** flag is set (either by the current helper call
+++ *		or because it was already present), 0 if it is not set.
+++ *
+++ * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
+++ *	Description
+++ *		Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
+++ *		**bpf_sk_release**\ () is unnecessary and not allowed.
+++ *	Return
+++ *		A **struct bpf_sock** pointer on success, or **NULL** in
+++ *		case of failure.
+++ *
+++ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+++ *	Description
+++ *		Look for TCP socket matching *tuple*, optionally in a child
+++ *		network namespace *netns*. The return value must be checked,
+++ *		and if non-**NULL**, released via **bpf_sk_release**\ ().
+++ *
+++ *		This function is identical to **bpf_sk_lookup_tcp**\ (), except
+++ *		that it also returns timewait or request sockets. Use
+++ *		**bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the
+++ *		full structure.
+++ *
+++ *		This helper is available only if the kernel was compiled with
+++ *		**CONFIG_NET** configuration option.
+++ *	Return
+++ *		Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+++ *		For sockets with reuseport option, the **struct bpf_sock**
+++ *		result is from *reuse*\ **->socks**\ [] using the hash of the
+++ *		tuple.
+++ *
+++ * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+++ * 	Description
+++ * 		Check whether *iph* and *th* contain a valid SYN cookie ACK for
+++ * 		the listening socket in *sk*.
+++ *
+++ * 		*iph* points to the start of the IPv4 or IPv6 header, while
+++ * 		*iph_len* contains **sizeof**\ (**struct iphdr**) or
+++ * 		**sizeof**\ (**struct ip6hdr**).
+++ *
+++ * 		*th* points to the start of the TCP header, while *th_len*
+++ * 		contains **sizeof**\ (**struct tcphdr**).
+++ *
+++ * 	Return
+++ * 		0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
+++ * 		error otherwise.
+++ *
+++ * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
+++ *	Description
+++ *		Get name of sysctl in /proc/sys/ and copy it into provided by
+++ *		program buffer *buf* of size *buf_len*.
+++ *
+++ *		The buffer is always NUL terminated, unless it's zero-sized.
+++ *
+++ *		If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is
+++ *		copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name
+++ *		only (e.g. "tcp_mem").
+++ *	Return
+++ *		Number of character copied (not including the trailing NUL).
+++ *
+++ *		**-E2BIG** if the buffer wasn't big enough (*buf* will contain
+++ *		truncated name in this case).
+++ *
+++ * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+++ *	Description
+++ *		Get current value of sysctl as it is presented in /proc/sys
+++ *		(incl. newline, etc), and copy it as a string into provided
+++ *		by program buffer *buf* of size *buf_len*.
+++ *
+++ *		The whole value is copied, no matter what file position user
+++ *		space issued e.g. sys_read at.
+++ *
+++ *		The buffer is always NUL terminated, unless it's zero-sized.
+++ *	Return
+++ *		Number of character copied (not including the trailing NUL).
+++ *
+++ *		**-E2BIG** if the buffer wasn't big enough (*buf* will contain
+++ *		truncated name in this case).
+++ *
+++ *		**-EINVAL** if current value was unavailable, e.g. because
+++ *		sysctl is uninitialized and read returns -EIO for it.
+++ *
+++ * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+++ *	Description
+++ *		Get new value being written by user space to sysctl (before
+++ *		the actual write happens) and copy it as a string into
+++ *		provided by program buffer *buf* of size *buf_len*.
+++ *
+++ *		User space may write new value at file position > 0.
+++ *
+++ *		The buffer is always NUL terminated, unless it's zero-sized.
+++ *	Return
+++ *		Number of character copied (not including the trailing NUL).
+++ *
+++ *		**-E2BIG** if the buffer wasn't big enough (*buf* will contain
+++ *		truncated name in this case).
+++ *
+++ *		**-EINVAL** if sysctl is being read.
+++ *
+++ * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
+++ *	Description
+++ *		Override new value being written by user space to sysctl with
+++ *		value provided by program in buffer *buf* of size *buf_len*.
+++ *
+++ *		*buf* should contain a string in same form as provided by user
+++ *		space on sysctl write.
+++ *
+++ *		User space may write new value at file position > 0. To override
+++ *		the whole sysctl value file position should be set to zero.
+++ *	Return
+++ *		0 on success.
+++ *
+++ *		**-E2BIG** if the *buf_len* is too big.
+++ *
+++ *		**-EINVAL** if sysctl is being read.
+++ *
+++ * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
+++ *	Description
+++ *		Convert the initial part of the string from buffer *buf* of
+++ *		size *buf_len* to a long integer according to the given base
+++ *		and save the result in *res*.
+++ *
+++ *		The string may begin with an arbitrary amount of white space
+++ *		(as determined by **isspace**\ (3)) followed by a single
+++ *		optional '**-**' sign.
+++ *
+++ *		Five least significant bits of *flags* encode base, other bits
+++ *		are currently unused.
+++ *
+++ *		Base must be either 8, 10, 16 or 0 to detect it automatically
+++ *		similar to user space **strtol**\ (3).
+++ *	Return
+++ *		Number of characters consumed on success. Must be positive but
+++ *		no more than *buf_len*.
+++ *
+++ *		**-EINVAL** if no valid digits were found or unsupported base
+++ *		was provided.
+++ *
+++ *		**-ERANGE** if resulting value was out of range.
+++ *
+++ * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
+++ *	Description
+++ *		Convert the initial part of the string from buffer *buf* of
+++ *		size *buf_len* to an unsigned long integer according to the
+++ *		given base and save the result in *res*.
+++ *
+++ *		The string may begin with an arbitrary amount of white space
+++ *		(as determined by **isspace**\ (3)).
+++ *
+++ *		Five least significant bits of *flags* encode base, other bits
+++ *		are currently unused.
+++ *
+++ *		Base must be either 8, 10, 16 or 0 to detect it automatically
+++ *		similar to user space **strtoul**\ (3).
+++ *	Return
+++ *		Number of characters consumed on success. Must be positive but
+++ *		no more than *buf_len*.
+++ *
+++ *		**-EINVAL** if no valid digits were found or unsupported base
+++ *		was provided.
+++ *
+++ *		**-ERANGE** if resulting value was out of range.
+++ *
+++ * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+++ *	Description
+++ *		Get a bpf-local-storage from a *sk*.
+++ *
+++ *		Logically, it could be thought of getting the value from
+++ *		a *map* with *sk* as the **key**.  From this
+++ *		perspective,  the usage is not much different from
+++ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this
+++ *		helper enforces the key must be a full socket and the map must
+++ *		be a **BPF_MAP_TYPE_SK_STORAGE** also.
+++ *
+++ *		Underneath, the value is stored locally at *sk* instead of
+++ *		the *map*.  The *map* is used as the bpf-local-storage
+++ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+++ *		searched against all bpf-local-storages residing at *sk*.
+++ *
+++ *		An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
+++ *		used such that a new bpf-local-storage will be
+++ *		created if one does not exist.  *value* can be used
+++ *		together with **BPF_SK_STORAGE_GET_F_CREATE** to specify
+++ *		the initial value of a bpf-local-storage.  If *value* is
+++ *		**NULL**, the new bpf-local-storage will be zero initialized.
+++ *	Return
+++ *		A bpf-local-storage pointer is returned on success.
+++ *
+++ *		**NULL** if not found or there was an error in adding
+++ *		a new bpf-local-storage.
+++ *
+++ * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+++ *	Description
+++ *		Delete a bpf-local-storage from a *sk*.
+++ *	Return
+++ *		0 on success.
+++ *
+++ *		**-ENOENT** if the bpf-local-storage cannot be found.
+++ *
+++ * int bpf_send_signal(u32 sig)
+++ *	Description
+++ *		Send signal *sig* to the current task.
+++ *	Return
+++ *		0 on success or successfully queued.
+++ *
+++ *		**-EBUSY** if work queue under nmi is full.
+++ *
+++ *		**-EINVAL** if *sig* is invalid.
+++ *
+++ *		**-EPERM** if no permission to send the *sig*.
+++ *
+++ *		**-EAGAIN** if bpf program can try again.
+++ *
+++ * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+++ *	Description
+++ *		Try to issue a SYN cookie for the packet with corresponding
+++ *		IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
+++ *
+++ *		*iph* points to the start of the IPv4 or IPv6 header, while
+++ *		*iph_len* contains **sizeof**\ (**struct iphdr**) or
+++ *		**sizeof**\ (**struct ip6hdr**).
+++ *
+++ *		*th* points to the start of the TCP header, while *th_len*
+++ *		contains the length of the TCP header.
+++ *
+++ *	Return
+++ *		On success, lower 32 bits hold the generated SYN cookie in
+++ *		followed by 16 bits which hold the MSS value for that cookie,
+++ *		and the top 16 bits are unused.
+++ *
+++ *		On failure, the returned value is one of the following:
+++ *
+++ *		**-EINVAL** SYN cookie cannot be issued due to error
+++ *
+++ *		**-ENOENT** SYN cookie should not be issued (no SYN flood)
+++ *
+++ *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
+++ *
+++ *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
+++ */
+++#define __BPF_FUNC_MAPPER(FN)		\
+++	FN(unspec),			\
+++	FN(map_lookup_elem),		\
+++	FN(map_update_elem),		\
+++	FN(map_delete_elem),		\
+++	FN(probe_read),			\
+++	FN(ktime_get_ns),		\
+++	FN(trace_printk),		\
+++	FN(get_prandom_u32),		\
+++	FN(get_smp_processor_id),	\
+++	FN(skb_store_bytes),		\
+++	FN(l3_csum_replace),		\
+++	FN(l4_csum_replace),		\
+++	FN(tail_call),			\
+++	FN(clone_redirect),		\
+++	FN(get_current_pid_tgid),	\
+++	FN(get_current_uid_gid),	\
+++	FN(get_current_comm),		\
+++	FN(get_cgroup_classid),		\
+++	FN(skb_vlan_push),		\
+++	FN(skb_vlan_pop),		\
+++	FN(skb_get_tunnel_key),		\
+++	FN(skb_set_tunnel_key),		\
+++	FN(perf_event_read),		\
+++	FN(redirect),			\
+++	FN(get_route_realm),		\
+++	FN(perf_event_output),		\
+++	FN(skb_load_bytes),		\
+++	FN(get_stackid),		\
+++	FN(csum_diff),			\
+++	FN(skb_get_tunnel_opt),		\
+++	FN(skb_set_tunnel_opt),		\
+++	FN(skb_change_proto),		\
+++	FN(skb_change_type),		\
+++	FN(skb_under_cgroup),		\
+++	FN(get_hash_recalc),		\
+++	FN(get_current_task),		\
+++	FN(probe_write_user),		\
+++	FN(current_task_under_cgroup),	\
+++	FN(skb_change_tail),		\
+++	FN(skb_pull_data),		\
+++	FN(csum_update),		\
+++	FN(set_hash_invalid),		\
+++	FN(get_numa_node_id),		\
+++	FN(skb_change_head),		\
+++	FN(xdp_adjust_head),		\
+++	FN(probe_read_str),		\
+++	FN(get_socket_cookie),		\
+++	FN(get_socket_uid),		\
+++	FN(set_hash),			\
+++	FN(setsockopt),			\
+++	FN(skb_adjust_room),		\
+++	FN(redirect_map),		\
+++	FN(sk_redirect_map),		\
+++	FN(sock_map_update),		\
+++	FN(xdp_adjust_meta),		\
+++	FN(perf_event_read_value),	\
+++	FN(perf_prog_read_value),	\
+++	FN(getsockopt),			\
+++	FN(override_return),		\
+++	FN(sock_ops_cb_flags_set),	\
+++	FN(msg_redirect_map),		\
+++	FN(msg_apply_bytes),		\
+++	FN(msg_cork_bytes),		\
+++	FN(msg_pull_data),		\
+++	FN(bind),			\
+++	FN(xdp_adjust_tail),		\
+++	FN(skb_get_xfrm_state),		\
+++	FN(get_stack),			\
+++	FN(skb_load_bytes_relative),	\
+++	FN(fib_lookup),			\
+++	FN(sock_hash_update),		\
+++	FN(msg_redirect_hash),		\
+++	FN(sk_redirect_hash),		\
+++	FN(lwt_push_encap),		\
+++	FN(lwt_seg6_store_bytes),	\
+++	FN(lwt_seg6_adjust_srh),	\
+++	FN(lwt_seg6_action),		\
+++	FN(rc_repeat),			\
+++	FN(rc_keydown),			\
+++	FN(skb_cgroup_id),		\
+++	FN(get_current_cgroup_id),	\
+++	FN(get_local_storage),		\
+++	FN(sk_select_reuseport),	\
+++	FN(skb_ancestor_cgroup_id),	\
+++	FN(sk_lookup_tcp),		\
+++	FN(sk_lookup_udp),		\
+++	FN(sk_release),			\
+++	FN(map_push_elem),		\
+++	FN(map_pop_elem),		\
+++	FN(map_peek_elem),		\
+++	FN(msg_push_data),		\
+++	FN(msg_pop_data),		\
+++	FN(rc_pointer_rel),		\
+++	FN(spin_lock),			\
+++	FN(spin_unlock),		\
+++	FN(sk_fullsock),		\
+++	FN(tcp_sock),			\
+++	FN(skb_ecn_set_ce),		\
+++	FN(get_listener_sock),		\
+++	FN(skc_lookup_tcp),		\
+++	FN(tcp_check_syncookie),	\
+++	FN(sysctl_get_name),		\
+++	FN(sysctl_get_current_value),	\
+++	FN(sysctl_get_new_value),	\
+++	FN(sysctl_set_new_value),	\
+++	FN(strtol),			\
+++	FN(strtoul),			\
+++	FN(sk_storage_get),		\
+++	FN(sk_storage_delete),		\
+++	FN(send_signal),		\
+++	FN(tcp_gen_syncookie),
+++
++ /* integer value in 'imm' field of BPF_CALL instruction selects which helper
++  * function eBPF program intends to call
++  */
+++#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
++ enum bpf_func_id {
++-	BPF_FUNC_unspec,
++-	BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
++-	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
++-	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
++-	BPF_FUNC_probe_read,      /* int bpf_probe_read(void *dst, int size, void *src) */
++-	BPF_FUNC_ktime_get_ns,    /* u64 bpf_ktime_get_ns(void) */
++-	BPF_FUNC_trace_printk,    /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */
++-	BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
++-	BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
++-
++-	/**
++-	 * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
++-	 * @skb: pointer to skb
++-	 * @offset: offset within packet from skb->mac_header
++-	 * @from: pointer where to copy bytes from
++-	 * @len: number of bytes to store into packet
++-	 * @flags: bit 0 - if true, recompute skb->csum
++-	 *         other bits - reserved
++-	 * Return: 0 on success
++-	 */
++-	BPF_FUNC_skb_store_bytes,
++-
++-	/**
++-	 * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum
++-	 * @skb: pointer to skb
++-	 * @offset: offset within packet where IP checksum is located
++-	 * @from: old value of header field
++-	 * @to: new value of header field
++-	 * @flags: bits 0-3 - size of header field
++-	 *         other bits - reserved
++-	 * Return: 0 on success
++-	 */
++-	BPF_FUNC_l3_csum_replace,
++-
++-	/**
++-	 * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum
++-	 * @skb: pointer to skb
++-	 * @offset: offset within packet where TCP/UDP checksum is located
++-	 * @from: old value of header field
++-	 * @to: new value of header field
++-	 * @flags: bits 0-3 - size of header field
++-	 *         bit 4 - is pseudo header
++-	 *         other bits - reserved
++-	 * Return: 0 on success
++-	 */
++-	BPF_FUNC_l4_csum_replace,
+++	__BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+++	__BPF_FUNC_MAX_ID,
+++};
+++#undef __BPF_ENUM_FN
++ 
++-	/**
++-	 * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program
++-	 * @ctx: context pointer passed to next program
++-	 * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
++-	 * @index: index inside array that selects specific program to run
++-	 * Return: 0 on success
++-	 */
++-	BPF_FUNC_tail_call,
+++/* All flags used by eBPF helper functions, placed here. */
++ 
++-	/**
++-	 * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev
++-	 * @skb: pointer to skb
++-	 * @ifindex: ifindex of the net device
++-	 * @flags: bit 0 - if set, redirect to ingress instead of egress
++-	 *         other bits - reserved
++-	 * Return: 0 on success
++-	 */
++-	BPF_FUNC_clone_redirect,
+++/* BPF_FUNC_skb_store_bytes flags. */
+++#define BPF_F_RECOMPUTE_CSUM		(1ULL << 0)
+++#define BPF_F_INVALIDATE_HASH		(1ULL << 1)
++ 
++-	/**
++-	 * u64 bpf_get_current_pid_tgid(void)
++-	 * Return: current->tgid << 32 | current->pid
++-	 */
++-	BPF_FUNC_get_current_pid_tgid,
+++/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+++ * First 4 bits are for passing the header field size.
+++ */
+++#define BPF_F_HDR_FIELD_MASK		0xfULL
++ 
++-	/**
++-	 * u64 bpf_get_current_uid_gid(void)
++-	 * Return: current_gid << 32 | current_uid
++-	 */
++-	BPF_FUNC_get_current_uid_gid,
+++/* BPF_FUNC_l4_csum_replace flags. */
+++#define BPF_F_PSEUDO_HDR		(1ULL << 4)
+++#define BPF_F_MARK_MANGLED_0		(1ULL << 5)
+++#define BPF_F_MARK_ENFORCE		(1ULL << 6)
+++
+++/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+++#define BPF_F_INGRESS			(1ULL << 0)
+++
+++/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+++#define BPF_F_TUNINFO_IPV6		(1ULL << 0)
+++
+++/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
+++#define BPF_F_SKIP_FIELD_MASK		0xffULL
+++#define BPF_F_USER_STACK		(1ULL << 8)
+++/* flags used by BPF_FUNC_get_stackid only. */
+++#define BPF_F_FAST_STACK_CMP		(1ULL << 9)
+++#define BPF_F_REUSE_STACKID		(1ULL << 10)
+++/* flags used by BPF_FUNC_get_stack only. */
+++#define BPF_F_USER_BUILD_ID		(1ULL << 11)
+++
+++/* BPF_FUNC_skb_set_tunnel_key flags. */
+++#define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
+++#define BPF_F_DONT_FRAGMENT		(1ULL << 2)
+++#define BPF_F_SEQ_NUMBER		(1ULL << 3)
++ 
++-	/**
++-	 * bpf_get_current_comm(char *buf, int size_of_buf)
++-	 * stores current->comm into buf
++-	 * Return: 0 on success
++-	 */
++-	BPF_FUNC_get_current_comm,
++-
++-	/**
++-	 * bpf_get_cgroup_classid(skb) - retrieve a proc's classid
++-	 * @skb: pointer to skb
++-	 * Return: classid if != 0
++-	 */
++-	BPF_FUNC_get_cgroup_classid,
++-	BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */
++-	BPF_FUNC_skb_vlan_pop,  /* bpf_skb_vlan_pop(skb) */
++-
++-	/**
++-	 * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags)
++-	 * retrieve or populate tunnel metadata
++-	 * @skb: pointer to skb
++-	 * @key: pointer to 'struct bpf_tunnel_key'
++-	 * @size: size of 'struct bpf_tunnel_key'
++-	 * @flags: room for future extensions
++-	 * Retrun: 0 on success
++-	 */
++-	BPF_FUNC_skb_get_tunnel_key,
++-	BPF_FUNC_skb_set_tunnel_key,
++-	BPF_FUNC_perf_event_read,	/* u64 bpf_perf_event_read(&map, index) */
++-	/**
++-	 * bpf_redirect(ifindex, flags) - redirect to another netdev
++-	 * @ifindex: ifindex of the net device
++-	 * @flags: bit 0 - if set, redirect to ingress instead of egress
++-	 *         other bits - reserved
++-	 * Return: TC_ACT_REDIRECT
++-	 */
++-	BPF_FUNC_redirect,
+++/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
+++ * BPF_FUNC_perf_event_read_value flags.
+++ */
+++#define BPF_F_INDEX_MASK		0xffffffffULL
+++#define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
+++/* BPF_FUNC_perf_event_output for sk_buff input context. */
+++#define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
+++
+++/* Current network namespace */
+++#define BPF_F_CURRENT_NETNS		(-1L)
+++
+++/* BPF_FUNC_skb_adjust_room flags. */
+++#define BPF_F_ADJ_ROOM_FIXED_GSO	(1ULL << 0)
+++
+++#define BPF_ADJ_ROOM_ENCAP_L2_MASK	0xff
+++#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT	56
+++
+++#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4	(1ULL << 1)
+++#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	(1ULL << 2)
+++#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE	(1ULL << 3)
+++#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP	(1ULL << 4)
+++#define BPF_F_ADJ_ROOM_ENCAP_L2(len)	(((__u64)len & \
+++					  BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+++					 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
+++
+++/* BPF_FUNC_sysctl_get_name flags. */
+++#define BPF_F_SYSCTL_BASE_NAME		(1ULL << 0)
+++
+++/* BPF_FUNC_sk_storage_get flags */
+++#define BPF_SK_STORAGE_GET_F_CREATE	(1ULL << 0)
+++
+++/* Mode for BPF_FUNC_skb_adjust_room helper. */
+++enum bpf_adj_room_mode {
+++	BPF_ADJ_ROOM_NET,
+++	BPF_ADJ_ROOM_MAC,
+++};
++ 
++-	/**
++-	 * bpf_get_route_realm(skb) - retrieve a dst's tclassid
++-	 * @skb: pointer to skb
++-	 * Return: realm if != 0
++-	 */
++-	BPF_FUNC_get_route_realm,
+++/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+++enum bpf_hdr_start_off {
+++	BPF_HDR_START_MAC,
+++	BPF_HDR_START_NET,
+++};
++ 
++-	/**
++-	 * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample
++-	 * @ctx: struct pt_regs*
++-	 * @map: pointer to perf_event_array map
++-	 * @index: index of event in the map
++-	 * @data: data on stack to be output as raw data
++-	 * @size: size of data
++-	 * Return: 0 on success
++-	 */
++-	BPF_FUNC_perf_event_output,
++-	__BPF_FUNC_MAX_ID,
+++/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+++enum bpf_lwt_encap_mode {
+++	BPF_LWT_ENCAP_SEG6,
+++	BPF_LWT_ENCAP_SEG6_INLINE,
+++	BPF_LWT_ENCAP_IP,
++ };
++ 
+++#define __bpf_md_ptr(type, name)	\
+++union {					\
+++	type name;			\
+++	__u64 :64;			\
+++} __attribute__((aligned(8)))
+++
++ /* user accessible mirror of in-kernel sk_buff.
++  * new fields can only be added to the end of this structure
++  */
++@@ -291,11 +2985,632 @@ struct __sk_buff {
++ 	__u32 cb[5];
++ 	__u32 hash;
++ 	__u32 tc_classid;
+++	__u32 data;
+++	__u32 data_end;
+++	__u32 napi_id;
+++
+++	/* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
+++	__u32 family;
+++	__u32 remote_ip4;	/* Stored in network byte order */
+++	__u32 local_ip4;	/* Stored in network byte order */
+++	__u32 remote_ip6[4];	/* Stored in network byte order */
+++	__u32 local_ip6[4];	/* Stored in network byte order */
+++	__u32 remote_port;	/* Stored in network byte order */
+++	__u32 local_port;	/* stored in host byte order */
+++	/* ... here. */
+++
+++	__u32 data_meta;
+++	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
+++	__u64 tstamp;
+++	__u32 wire_len;
+++	__u32 gso_segs;
+++	__bpf_md_ptr(struct bpf_sock *, sk);
++ };
++ 
++ struct bpf_tunnel_key {
++ 	__u32 tunnel_id;
++-	__u32 remote_ipv4;
+++	union {
+++		__u32 remote_ipv4;
+++		__u32 remote_ipv6[4];
+++	};
+++	__u8 tunnel_tos;
+++	__u8 tunnel_ttl;
+++	__u16 tunnel_ext;	/* Padding, future use. */
+++	__u32 tunnel_label;
+++};
+++
+++/* user accessible mirror of in-kernel xfrm_state.
+++ * new fields can only be added to the end of this structure
+++ */
+++struct bpf_xfrm_state {
+++	__u32 reqid;
+++	__u32 spi;	/* Stored in network byte order */
+++	__u16 family;
+++	__u16 ext;	/* Padding, future use. */
+++	union {
+++		__u32 remote_ipv4;	/* Stored in network byte order */
+++		__u32 remote_ipv6[4];	/* Stored in network byte order */
+++	};
+++};
+++
+++/* Generic BPF return codes which all BPF program types may support.
+++ * The values are binary compatible with their TC_ACT_* counter-part to
+++ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+++ * programs.
+++ *
+++ * XDP is handled seprately, see XDP_*.
+++ */
+++enum bpf_ret_code {
+++	BPF_OK = 0,
+++	/* 1 reserved */
+++	BPF_DROP = 2,
+++	/* 3-6 reserved */
+++	BPF_REDIRECT = 7,
+++	/* >127 are reserved for prog type specific return codes.
+++	 *
+++	 * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and
+++	 *    BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been
+++	 *    changed and should be routed based on its new L3 header.
+++	 *    (This is an L3 redirect, as opposed to L2 redirect
+++	 *    represented by BPF_REDIRECT above).
+++	 */
+++	BPF_LWT_REROUTE = 128,
+++};
+++
+++struct bpf_sock {
+++	__u32 bound_dev_if;
+++	__u32 family;
+++	__u32 type;
+++	__u32 protocol;
+++	__u32 mark;
+++	__u32 priority;
+++	/* IP address also allows 1 and 2 bytes access */
+++	__u32 src_ip4;
+++	__u32 src_ip6[4];
+++	__u32 src_port;		/* host byte order */
+++	__u32 dst_port;		/* network byte order */
+++	__u32 dst_ip4;
+++	__u32 dst_ip6[4];
+++	__u32 state;
+++};
+++
+++struct bpf_tcp_sock {
+++	__u32 snd_cwnd;		/* Sending congestion window		*/
+++	__u32 srtt_us;		/* smoothed round trip time << 3 in usecs */
+++	__u32 rtt_min;
+++	__u32 snd_ssthresh;	/* Slow start size threshold		*/
+++	__u32 rcv_nxt;		/* What we want to receive next		*/
+++	__u32 snd_nxt;		/* Next sequence we send		*/
+++	__u32 snd_una;		/* First byte we want an ack for	*/
+++	__u32 mss_cache;	/* Cached effective mss, not including SACKS */
+++	__u32 ecn_flags;	/* ECN status bits.			*/
+++	__u32 rate_delivered;	/* saved rate sample: packets delivered */
+++	__u32 rate_interval_us;	/* saved rate sample: time elapsed */
+++	__u32 packets_out;	/* Packets which are "in flight"	*/
+++	__u32 retrans_out;	/* Retransmitted packets out		*/
+++	__u32 total_retrans;	/* Total retransmits for entire connection */
+++	__u32 segs_in;		/* RFC4898 tcpEStatsPerfSegsIn
+++				 * total number of segments in.
+++				 */
+++	__u32 data_segs_in;	/* RFC4898 tcpEStatsPerfDataSegsIn
+++				 * total number of data segments in.
+++				 */
+++	__u32 segs_out;		/* RFC4898 tcpEStatsPerfSegsOut
+++				 * The total number of segments sent.
+++				 */
+++	__u32 data_segs_out;	/* RFC4898 tcpEStatsPerfDataSegsOut
+++				 * total number of data segments sent.
+++				 */
+++	__u32 lost_out;		/* Lost packets			*/
+++	__u32 sacked_out;	/* SACK'd packets			*/
+++	__u64 bytes_received;	/* RFC4898 tcpEStatsAppHCThruOctetsReceived
+++				 * sum(delta(rcv_nxt)), or how many bytes
+++				 * were acked.
+++				 */
+++	__u64 bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
+++				 * sum(delta(snd_una)), or how many bytes
+++				 * were acked.
+++				 */
+++	__u32 dsack_dups;	/* RFC4898 tcpEStatsStackDSACKDups
+++				 * total number of DSACK blocks received
+++				 */
+++	__u32 delivered;	/* Total data packets delivered incl. rexmits */
+++	__u32 delivered_ce;	/* Like the above but only ECE marked packets */
+++	__u32 icsk_retransmits;	/* Number of unrecovered [RTO] timeouts */
+++};
+++
+++struct bpf_sock_tuple {
+++	union {
+++		struct {
+++			__be32 saddr;
+++			__be32 daddr;
+++			__be16 sport;
+++			__be16 dport;
+++		} ipv4;
+++		struct {
+++			__be32 saddr[4];
+++			__be32 daddr[4];
+++			__be16 sport;
+++			__be16 dport;
+++		} ipv6;
+++	};
+++};
+++
+++struct bpf_xdp_sock {
+++	__u32 queue_id;
+++};
+++
+++#define XDP_PACKET_HEADROOM 256
+++
+++/* User return codes for XDP prog type.
+++ * A valid XDP program must return one of these defined values. All other
+++ * return codes are reserved for future use. Unknown return codes will
+++ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
+++ */
+++enum xdp_action {
+++	XDP_ABORTED = 0,
+++	XDP_DROP,
+++	XDP_PASS,
+++	XDP_TX,
+++	XDP_REDIRECT,
+++};
+++
+++/* user accessible metadata for XDP packet hook
+++ * new fields must be added to the end of this structure
+++ */
+++struct xdp_md {
+++	__u32 data;
+++	__u32 data_end;
+++	__u32 data_meta;
+++	/* Below access go through struct xdp_rxq_info */
+++	__u32 ingress_ifindex; /* rxq->dev->ifindex */
+++	__u32 rx_queue_index;  /* rxq->queue_index  */
+++};
+++
+++enum sk_action {
+++	SK_DROP = 0,
+++	SK_PASS,
+++};
+++
+++/* user accessible metadata for SK_MSG packet hook, new fields must
+++ * be added to the end of this structure
+++ */
+++struct sk_msg_md {
+++	__bpf_md_ptr(void *, data);
+++	__bpf_md_ptr(void *, data_end);
+++
+++	__u32 family;
+++	__u32 remote_ip4;	/* Stored in network byte order */
+++	__u32 local_ip4;	/* Stored in network byte order */
+++	__u32 remote_ip6[4];	/* Stored in network byte order */
+++	__u32 local_ip6[4];	/* Stored in network byte order */
+++	__u32 remote_port;	/* Stored in network byte order */
+++	__u32 local_port;	/* stored in host byte order */
+++	__u32 size;		/* Total size of sk_msg */
+++};
+++
+++struct sk_reuseport_md {
+++	/*
+++	 * Start of directly accessible data. It begins from
+++	 * the tcp/udp header.
+++	 */
+++	__bpf_md_ptr(void *, data);
+++	/* End of directly accessible data */
+++	__bpf_md_ptr(void *, data_end);
+++	/*
+++	 * Total length of packet (starting from the tcp/udp header).
+++	 * Note that the directly accessible bytes (data_end - data)
+++	 * could be less than this "len".  Those bytes could be
+++	 * indirectly read by a helper "bpf_skb_load_bytes()".
+++	 */
+++	__u32 len;
+++	/*
+++	 * Eth protocol in the mac header (network byte order). e.g.
+++	 * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+++	 */
+++	__u32 eth_protocol;
+++	__u32 ip_protocol;	/* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+++	__u32 bind_inany;	/* Is sock bound to an INANY address? */
+++	__u32 hash;		/* A hash of the packet 4 tuples */
+++};
+++
+++#define BPF_TAG_SIZE	8
+++
+++struct bpf_prog_info {
+++	__u32 type;
+++	__u32 id;
+++	__u8  tag[BPF_TAG_SIZE];
+++	__u32 jited_prog_len;
+++	__u32 xlated_prog_len;
+++	__aligned_u64 jited_prog_insns;
+++	__aligned_u64 xlated_prog_insns;
+++	__u64 load_time;	/* ns since boottime */
+++	__u32 created_by_uid;
+++	__u32 nr_map_ids;
+++	__aligned_u64 map_ids;
+++	char name[BPF_OBJ_NAME_LEN];
+++	__u32 ifindex;
+++	__u32 gpl_compatible:1;
+++	__u32 :31; /* alignment pad */
+++	__u64 netns_dev;
+++	__u64 netns_ino;
+++	__u32 nr_jited_ksyms;
+++	__u32 nr_jited_func_lens;
+++	__aligned_u64 jited_ksyms;
+++	__aligned_u64 jited_func_lens;
+++	__u32 btf_id;
+++	__u32 func_info_rec_size;
+++	__aligned_u64 func_info;
+++	__u32 nr_func_info;
+++	__u32 nr_line_info;
+++	__aligned_u64 line_info;
+++	__aligned_u64 jited_line_info;
+++	__u32 nr_jited_line_info;
+++	__u32 line_info_rec_size;
+++	__u32 jited_line_info_rec_size;
+++	__u32 nr_prog_tags;
+++	__aligned_u64 prog_tags;
+++	__u64 run_time_ns;
+++	__u64 run_cnt;
+++} __attribute__((aligned(8)));
+++
+++struct bpf_map_info {
+++	__u32 type;
+++	__u32 id;
+++	__u32 key_size;
+++	__u32 value_size;
+++	__u32 max_entries;
+++	__u32 map_flags;
+++	char  name[BPF_OBJ_NAME_LEN];
+++	__u32 ifindex;
+++	__u32 :32;
+++	__u64 netns_dev;
+++	__u64 netns_ino;
+++	__u32 btf_id;
+++	__u32 btf_key_type_id;
+++	__u32 btf_value_type_id;
+++} __attribute__((aligned(8)));
+++
+++struct bpf_btf_info {
+++	__aligned_u64 btf;
+++	__u32 btf_size;
+++	__u32 id;
+++} __attribute__((aligned(8)));
+++
+++/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
+++ * by user and intended to be used by socket (e.g. to bind to, depends on
+++ * attach attach type).
+++ */
+++struct bpf_sock_addr {
+++	__u32 user_family;	/* Allows 4-byte read, but no write. */
+++	__u32 user_ip4;		/* Allows 1,2,4-byte read and 4-byte write.
+++				 * Stored in network byte order.
+++				 */
+++	__u32 user_ip6[4];	/* Allows 1,2,4,8-byte read and 4,8-byte write.
+++				 * Stored in network byte order.
+++				 */
+++	__u32 user_port;	/* Allows 4-byte read and write.
+++				 * Stored in network byte order
+++				 */
+++	__u32 family;		/* Allows 4-byte read, but no write */
+++	__u32 type;		/* Allows 4-byte read, but no write */
+++	__u32 protocol;		/* Allows 4-byte read, but no write */
+++	__u32 msg_src_ip4;	/* Allows 1,2,4-byte read and 4-byte write.
+++				 * Stored in network byte order.
+++				 */
+++	__u32 msg_src_ip6[4];	/* Allows 1,2,4,8-byte read and 4,8-byte write.
+++				 * Stored in network byte order.
+++				 */
+++	__bpf_md_ptr(struct bpf_sock *, sk);
+++};
+++
+++/* User bpf_sock_ops struct to access socket values and specify request ops
+++ * and their replies.
+++ * Some of this fields are in network (bigendian) byte order and may need
+++ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+++ * New fields can only be added at the end of this structure
+++ */
+++struct bpf_sock_ops {
+++	__u32 op;
+++	union {
+++		__u32 args[4];		/* Optionally passed to bpf program */
+++		__u32 reply;		/* Returned by bpf program	    */
+++		__u32 replylong[4];	/* Optionally returned by bpf prog  */
+++	};
+++	__u32 family;
+++	__u32 remote_ip4;	/* Stored in network byte order */
+++	__u32 local_ip4;	/* Stored in network byte order */
+++	__u32 remote_ip6[4];	/* Stored in network byte order */
+++	__u32 local_ip6[4];	/* Stored in network byte order */
+++	__u32 remote_port;	/* Stored in network byte order */
+++	__u32 local_port;	/* stored in host byte order */
+++	__u32 is_fullsock;	/* Some TCP fields are only valid if
+++				 * there is a full socket. If not, the
+++				 * fields read as zero.
+++				 */
+++	__u32 snd_cwnd;
+++	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
+++	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+++	__u32 state;
+++	__u32 rtt_min;
+++	__u32 snd_ssthresh;
+++	__u32 rcv_nxt;
+++	__u32 snd_nxt;
+++	__u32 snd_una;
+++	__u32 mss_cache;
+++	__u32 ecn_flags;
+++	__u32 rate_delivered;
+++	__u32 rate_interval_us;
+++	__u32 packets_out;
+++	__u32 retrans_out;
+++	__u32 total_retrans;
+++	__u32 segs_in;
+++	__u32 data_segs_in;
+++	__u32 segs_out;
+++	__u32 data_segs_out;
+++	__u32 lost_out;
+++	__u32 sacked_out;
+++	__u32 sk_txhash;
+++	__u64 bytes_received;
+++	__u64 bytes_acked;
+++	__bpf_md_ptr(struct bpf_sock *, sk);
+++};
+++
+++/* Definitions for bpf_sock_ops_cb_flags */
+++#define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
+++#define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
+++#define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
+++#define BPF_SOCK_OPS_RTT_CB_FLAG	(1<<3)
+++#define BPF_SOCK_OPS_ALL_CB_FLAGS       0xF		/* Mask of all currently
+++							 * supported cb flags
+++							 */
+++
+++/* List of known BPF sock_ops operators.
+++ * New entries can only be added at the end
+++ */
+++enum {
+++	BPF_SOCK_OPS_VOID,
+++	BPF_SOCK_OPS_TIMEOUT_INIT,	/* Should return SYN-RTO value to use or
+++					 * -1 if default value should be used
+++					 */
+++	BPF_SOCK_OPS_RWND_INIT,		/* Should return initial advertized
+++					 * window (in packets) or -1 if default
+++					 * value should be used
+++					 */
+++	BPF_SOCK_OPS_TCP_CONNECT_CB,	/* Calls BPF program right before an
+++					 * active connection is initialized
+++					 */
+++	BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB,	/* Calls BPF program when an
+++						 * active connection is
+++						 * established
+++						 */
+++	BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,	/* Calls BPF program when a
+++						 * passive connection is
+++						 * established
+++						 */
+++	BPF_SOCK_OPS_NEEDS_ECN,		/* If connection's congestion control
+++					 * needs ECN
+++					 */
+++	BPF_SOCK_OPS_BASE_RTT,		/* Get base RTT. The correct value is
+++					 * based on the path and may be
+++					 * dependent on the congestion control
+++					 * algorithm. In general it indicates
+++					 * a congestion threshold. RTTs above
+++					 * this indicate congestion
+++					 */
+++	BPF_SOCK_OPS_RTO_CB,		/* Called when an RTO has triggered.
+++					 * Arg1: value of icsk_retransmits
+++					 * Arg2: value of icsk_rto
+++					 * Arg3: whether RTO has expired
+++					 */
+++	BPF_SOCK_OPS_RETRANS_CB,	/* Called when skb is retransmitted.
+++					 * Arg1: sequence number of 1st byte
+++					 * Arg2: # segments
+++					 * Arg3: return value of
+++					 *       tcp_transmit_skb (0 => success)
+++					 */
+++	BPF_SOCK_OPS_STATE_CB,		/* Called when TCP changes state.
+++					 * Arg1: old_state
+++					 * Arg2: new_state
+++					 */
+++	BPF_SOCK_OPS_TCP_LISTEN_CB,	/* Called on listen(2), right after
+++					 * socket transition to LISTEN state.
+++					 */
+++	BPF_SOCK_OPS_RTT_CB,		/* Called on every RTT.
+++					 */
+++};
+++
+++/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+++ * changes between the TCP and BPF versions. Ideally this should never happen.
+++ * If it does, we need to add code to convert them before calling
+++ * the BPF sock_ops function.
+++ */
+++enum {
+++	BPF_TCP_ESTABLISHED = 1,
+++	BPF_TCP_SYN_SENT,
+++	BPF_TCP_SYN_RECV,
+++	BPF_TCP_FIN_WAIT1,
+++	BPF_TCP_FIN_WAIT2,
+++	BPF_TCP_TIME_WAIT,
+++	BPF_TCP_CLOSE,
+++	BPF_TCP_CLOSE_WAIT,
+++	BPF_TCP_LAST_ACK,
+++	BPF_TCP_LISTEN,
+++	BPF_TCP_CLOSING,	/* Now a valid state */
+++	BPF_TCP_NEW_SYN_RECV,
+++
+++	BPF_TCP_MAX_STATES	/* Leave at the end! */
+++};
+++
+++#define TCP_BPF_IW		1001	/* Set TCP initial congestion window */
+++#define TCP_BPF_SNDCWND_CLAMP	1002	/* Set sndcwnd_clamp */
+++
+++struct bpf_perf_event_value {
+++	__u64 counter;
+++	__u64 enabled;
+++	__u64 running;
+++};
+++
+++#define BPF_DEVCG_ACC_MKNOD	(1ULL << 0)
+++#define BPF_DEVCG_ACC_READ	(1ULL << 1)
+++#define BPF_DEVCG_ACC_WRITE	(1ULL << 2)
+++
+++#define BPF_DEVCG_DEV_BLOCK	(1ULL << 0)
+++#define BPF_DEVCG_DEV_CHAR	(1ULL << 1)
+++
+++struct bpf_cgroup_dev_ctx {
+++	/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
+++	__u32 access_type;
+++	__u32 major;
+++	__u32 minor;
+++};
+++
+++struct bpf_raw_tracepoint_args {
+++	__u64 args[0];
+++};
+++
+++/* DIRECT:  Skip the FIB rules and go to FIB table associated with device
+++ * OUTPUT:  Do lookup from egress perspective; default is ingress
+++ */
+++#define BPF_FIB_LOOKUP_DIRECT  (1U << 0)
+++#define BPF_FIB_LOOKUP_OUTPUT  (1U << 1)
+++
+++enum {
+++	BPF_FIB_LKUP_RET_SUCCESS,      /* lookup successful */
+++	BPF_FIB_LKUP_RET_BLACKHOLE,    /* dest is blackholed; can be dropped */
+++	BPF_FIB_LKUP_RET_UNREACHABLE,  /* dest is unreachable; can be dropped */
+++	BPF_FIB_LKUP_RET_PROHIBIT,     /* dest not allowed; can be dropped */
+++	BPF_FIB_LKUP_RET_NOT_FWDED,    /* packet is not forwarded */
+++	BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
+++	BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
+++	BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
+++	BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
+++};
+++
+++struct bpf_fib_lookup {
+++	/* input:  network family for lookup (AF_INET, AF_INET6)
+++	 * output: network family of egress nexthop
+++	 */
+++	__u8	family;
+++
+++	/* set if lookup is to consider L4 data - e.g., FIB rules */
+++	__u8	l4_protocol;
+++	__be16	sport;
+++	__be16	dport;
+++
+++	/* total length of packet from network header - used for MTU check */
+++	__u16	tot_len;
+++
+++	/* input: L3 device index for lookup
+++	 * output: device index from FIB lookup
+++	 */
+++	__u32	ifindex;
+++
+++	union {
+++		/* inputs to lookup */
+++		__u8	tos;		/* AF_INET  */
+++		__be32	flowinfo;	/* AF_INET6, flow_label + priority */
+++
+++		/* output: metric of fib result (IPv4/IPv6 only) */
+++		__u32	rt_metric;
+++	};
+++
+++	union {
+++		__be32		ipv4_src;
+++		__u32		ipv6_src[4];  /* in6_addr; network order */
+++	};
+++
+++	/* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
+++	 * network header. output: bpf_fib_lookup sets to gateway address
+++	 * if FIB lookup returns gateway route
+++	 */
+++	union {
+++		__be32		ipv4_dst;
+++		__u32		ipv6_dst[4];  /* in6_addr; network order */
+++	};
+++
+++	/* output */
+++	__be16	h_vlan_proto;
+++	__be16	h_vlan_TCI;
+++	__u8	smac[6];     /* ETH_ALEN */
+++	__u8	dmac[6];     /* ETH_ALEN */
+++};
+++
+++enum bpf_task_fd_type {
+++	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
+++	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
+++	BPF_FD_TYPE_KPROBE,		/* (symbol + offset) or addr */
+++	BPF_FD_TYPE_KRETPROBE,		/* (symbol + offset) or addr */
+++	BPF_FD_TYPE_UPROBE,		/* filename + offset */
+++	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
+++};
+++
+++#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG		(1U << 0)
+++#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL		(1U << 1)
+++#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP		(1U << 2)
+++
+++struct bpf_flow_keys {
+++	__u16	nhoff;
+++	__u16	thoff;
+++	__u16	addr_proto;			/* ETH_P_* of valid addrs */
+++	__u8	is_frag;
+++	__u8	is_first_frag;
+++	__u8	is_encap;
+++	__u8	ip_proto;
+++	__be16	n_proto;
+++	__be16	sport;
+++	__be16	dport;
+++	union {
+++		struct {
+++			__be32	ipv4_src;
+++			__be32	ipv4_dst;
+++		};
+++		struct {
+++			__u32	ipv6_src[4];	/* in6_addr; network order */
+++			__u32	ipv6_dst[4];	/* in6_addr; network order */
+++		};
+++	};
+++	__u32	flags;
+++	__be32	flow_label;
+++};
+++
+++struct bpf_func_info {
+++	__u32	insn_off;
+++	__u32	type_id;
+++};
+++
+++#define BPF_LINE_INFO_LINE_NUM(line_col)	((line_col) >> 10)
+++#define BPF_LINE_INFO_LINE_COL(line_col)	((line_col) & 0x3ff)
+++
+++struct bpf_line_info {
+++	__u32	insn_off;
+++	__u32	file_name_off;
+++	__u32	line_off;
+++	__u32	line_col;
+++};
+++
+++struct bpf_spin_lock {
+++	__u32	val;
+++};
+++
+++struct bpf_sysctl {
+++	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
+++				 * Allows 1,2,4-byte read, but no write.
+++				 */
+++	__u32	file_pos;	/* Sysctl file position to read from, write to.
+++				 * Allows 1,2,4-byte read an 4-byte write.
+++				 */
+++};
+++
+++struct bpf_sockopt {
+++	__bpf_md_ptr(struct bpf_sock *, sk);
+++	__bpf_md_ptr(void *, optval);
+++	__bpf_md_ptr(void *, optval_end);
+++
+++	__s32	level;
+++	__s32	optname;
+++	__s32	optlen;
+++	__s32	retval;
++ };
++ 
++ #endif /* _UAPI__LINUX_BPF_H__ */
++--- /dev/null
+++++ b/include/uapi/linux/bpfilter.h
++@@ -0,0 +1,21 @@
+++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+++#ifndef _UAPI_LINUX_BPFILTER_H
+++#define _UAPI_LINUX_BPFILTER_H
+++
+++#include <linux/if.h>
+++
+++enum {
+++	BPFILTER_IPT_SO_SET_REPLACE = 64,
+++	BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65,
+++	BPFILTER_IPT_SET_MAX,
+++};
+++
+++enum {
+++	BPFILTER_IPT_SO_GET_INFO = 64,
+++	BPFILTER_IPT_SO_GET_ENTRIES = 65,
+++	BPFILTER_IPT_SO_GET_REVISION_MATCH = 66,
+++	BPFILTER_IPT_SO_GET_REVISION_TARGET = 67,
+++	BPFILTER_IPT_GET_MAX,
+++};
+++
+++#endif /* _UAPI_LINUX_BPFILTER_H */
++--- /dev/null
+++++ b/include/uapi/linux/bpf_perf_event.h
++@@ -0,0 +1,19 @@
+++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+++/* Copyright (c) 2016 Facebook
+++ *
+++ * This program is free software; you can redistribute it and/or
+++ * modify it under the terms of version 2 of the GNU General Public
+++ * License as published by the Free Software Foundation.
+++ */
+++#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+++#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+++
+++#include <asm/bpf_perf_event.h>
+++
+++struct bpf_perf_event_data {
+++	bpf_user_pt_regs_t regs;
+++	__u64 sample_period;
+++	__u64 addr;
+++};
+++
+++#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
++--- /dev/null
+++++ b/include/uapi/linux/btf.h
++@@ -0,0 +1,165 @@
+++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+++/* Copyright (c) 2018 Facebook */
+++#ifndef _UAPI__LINUX_BTF_H__
+++#define _UAPI__LINUX_BTF_H__
+++
+++#include <linux/types.h>
+++
+++#define BTF_MAGIC	0xeB9F
+++#define BTF_VERSION	1
+++
+++struct btf_header {
+++	__u16	magic;
+++	__u8	version;
+++	__u8	flags;
+++	__u32	hdr_len;
+++
+++	/* All offsets are in bytes relative to the end of this header */
+++	__u32	type_off;	/* offset of type section	*/
+++	__u32	type_len;	/* length of type section	*/
+++	__u32	str_off;	/* offset of string section	*/
+++	__u32	str_len;	/* length of string section	*/
+++};
+++
+++/* Max # of type identifier */
+++#define BTF_MAX_TYPE	0x000fffff
+++/* Max offset into the string section */
+++#define BTF_MAX_NAME_OFFSET	0x00ffffff
+++/* Max # of struct/union/enum members or func args */
+++#define BTF_MAX_VLEN	0xffff
+++
+++struct btf_type {
+++	__u32 name_off;
+++	/* "info" bits arrangement
+++	 * bits  0-15: vlen (e.g. # of struct's members)
+++	 * bits 16-23: unused
+++	 * bits 24-27: kind (e.g. int, ptr, array...etc)
+++	 * bits 28-30: unused
+++	 * bit     31: kind_flag, currently used by
+++	 *             struct, union and fwd
+++	 */
+++	__u32 info;
+++	/* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
+++	 * "size" tells the size of the type it is describing.
+++	 *
+++	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+++	 * FUNC, FUNC_PROTO and VAR.
+++	 * "type" is a type_id referring to another type.
+++	 */
+++	union {
+++		__u32 size;
+++		__u32 type;
+++	};
+++};
+++
+++#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x0f)
+++#define BTF_INFO_VLEN(info)	((info) & 0xffff)
+++#define BTF_INFO_KFLAG(info)	((info) >> 31)
+++
+++#define BTF_KIND_UNKN		0	/* Unknown	*/
+++#define BTF_KIND_INT		1	/* Integer	*/
+++#define BTF_KIND_PTR		2	/* Pointer	*/
+++#define BTF_KIND_ARRAY		3	/* Array	*/
+++#define BTF_KIND_STRUCT		4	/* Struct	*/
+++#define BTF_KIND_UNION		5	/* Union	*/
+++#define BTF_KIND_ENUM		6	/* Enumeration	*/
+++#define BTF_KIND_FWD		7	/* Forward	*/
+++#define BTF_KIND_TYPEDEF	8	/* Typedef	*/
+++#define BTF_KIND_VOLATILE	9	/* Volatile	*/
+++#define BTF_KIND_CONST		10	/* Const	*/
+++#define BTF_KIND_RESTRICT	11	/* Restrict	*/
+++#define BTF_KIND_FUNC		12	/* Function	*/
+++#define BTF_KIND_FUNC_PROTO	13	/* Function Proto	*/
+++#define BTF_KIND_VAR		14	/* Variable	*/
+++#define BTF_KIND_DATASEC	15	/* Section	*/
+++#define BTF_KIND_MAX		BTF_KIND_DATASEC
+++#define NR_BTF_KINDS		(BTF_KIND_MAX + 1)
+++
+++/* For some specific BTF_KIND, "struct btf_type" is immediately
+++ * followed by extra data.
+++ */
+++
+++/* BTF_KIND_INT is followed by a u32 and the following
+++ * is the 32 bits arrangement:
+++ */
+++#define BTF_INT_ENCODING(VAL)	(((VAL) & 0x0f000000) >> 24)
+++#define BTF_INT_OFFSET(VAL)	(((VAL) & 0x00ff0000) >> 16)
+++#define BTF_INT_BITS(VAL)	((VAL)  & 0x000000ff)
+++
+++/* Attributes stored in the BTF_INT_ENCODING */
+++#define BTF_INT_SIGNED	(1 << 0)
+++#define BTF_INT_CHAR	(1 << 1)
+++#define BTF_INT_BOOL	(1 << 2)
+++
+++/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
+++ * The exact number of btf_enum is stored in the vlen (of the
+++ * info in "struct btf_type").
+++ */
+++struct btf_enum {
+++	__u32	name_off;
+++	__s32	val;
+++};
+++
+++/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
+++struct btf_array {
+++	__u32	type;
+++	__u32	index_type;
+++	__u32	nelems;
+++};
+++
+++/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+++ * by multiple "struct btf_member".  The exact number
+++ * of btf_member is stored in the vlen (of the info in
+++ * "struct btf_type").
+++ */
+++struct btf_member {
+++	__u32	name_off;
+++	__u32	type;
+++	/* If the type info kind_flag is set, the btf_member offset
+++	 * contains both member bitfield size and bit offset. The
+++	 * bitfield size is set for bitfield members. If the type
+++	 * info kind_flag is not set, the offset contains only bit
+++	 * offset.
+++	 */
+++	__u32	offset;
+++};
+++
+++/* If the struct/union type info kind_flag is set, the
+++ * following two macros are used to access bitfield_size
+++ * and bit_offset from btf_member.offset.
+++ */
+++#define BTF_MEMBER_BITFIELD_SIZE(val)	((val) >> 24)
+++#define BTF_MEMBER_BIT_OFFSET(val)	((val) & 0xffffff)
+++
+++/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param".
+++ * The exact number of btf_param is stored in the vlen (of the
+++ * info in "struct btf_type").
+++ */
+++struct btf_param {
+++	__u32	name_off;
+++	__u32	type;
+++};
+++
+++enum {
+++	BTF_VAR_STATIC = 0,
+++	BTF_VAR_GLOBAL_ALLOCATED,
+++};
+++
+++/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
+++ * additional information related to the variable such as its linkage.
+++ */
+++struct btf_var {
+++	__u32	linkage;
+++};
+++
+++/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo"
+++ * to describe all BTF_KIND_VAR types it contains along with it's
+++ * in-section offset as well as size.
+++ */
+++struct btf_var_secinfo {
+++	__u32	type;
+++	__u32	offset;
+++	__u32	size;
+++};
+++
+++#endif /* _UAPI__LINUX_BTF_H__ */
++--- a/kernel/bpf/arraymap.c
+++++ b/kernel/bpf/arraymap.c
++@@ -1,78 +1,141 @@
+++// SPDX-License-Identifier: GPL-2.0-only
++ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
++- *
++- * This program is free software; you can redistribute it and/or
++- * modify it under the terms of version 2 of the GNU General Public
++- * License as published by the Free Software Foundation.
++- *
++- * This program is distributed in the hope that it will be useful, but
++- * WITHOUT ANY WARRANTY; without even the implied warranty of
++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++- * General Public License for more details.
+++ * Copyright (c) 2016,2017 Facebook
++  */
++ #include <linux/bpf.h>
+++#include <linux/btf.h>
++ #include <linux/err.h>
++-#include <linux/vmalloc.h>
++ #include <linux/slab.h>
++ #include <linux/mm.h>
++ #include <linux/filter.h>
++ #include <linux/perf_event.h>
+++#include <uapi/linux/btf.h>
+++
+++#include "map_in_map.h"
+++
+++#define ARRAY_CREATE_FLAG_MASK \
+++	(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
+++
+++static void bpf_array_free_percpu(struct bpf_array *array)
+++{
+++	int i;
+++
+++	for (i = 0; i < array->map.max_entries; i++) {
+++		free_percpu(array->pptrs[i]);
+++		cond_resched();
+++	}
+++}
+++
+++static int bpf_array_alloc_percpu(struct bpf_array *array)
+++{
+++	void __percpu *ptr;
+++	int i;
+++
+++	for (i = 0; i < array->map.max_entries; i++) {
+++		ptr = __alloc_percpu_gfp(array->elem_size, 8,
+++					 GFP_USER | __GFP_NOWARN);
+++		if (!ptr) {
+++			bpf_array_free_percpu(array);
+++			return -ENOMEM;
+++		}
+++		array->pptrs[i] = ptr;
+++		cond_resched();
+++	}
+++
+++	return 0;
+++}
++ 
++ /* Called from syscall */
++-static struct bpf_map *array_map_alloc(union bpf_attr *attr)
+++int array_map_alloc_check(union bpf_attr *attr)
++ {
++-	struct bpf_array *array;
++-	u32 elem_size, array_size;
++-	u32 index_mask, max_entries;
++-	bool unpriv = !capable(CAP_SYS_ADMIN);
+++	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
+++	int numa_node = bpf_map_attr_numa_node(attr);
++ 
++ 	/* check sanity of attributes */
++ 	if (attr->max_entries == 0 || attr->key_size != 4 ||
++-	    attr->value_size == 0)
++-		return ERR_PTR(-EINVAL);
+++	    attr->value_size == 0 ||
+++	    attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
+++	    !bpf_map_flags_access_ok(attr->map_flags) ||
+++	    (percpu && numa_node != NUMA_NO_NODE))
+++		return -EINVAL;
++ 
++-	if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
+++	if (attr->value_size > KMALLOC_MAX_SIZE)
++ 		/* if value_size is bigger, the user space won't be able to
++ 		 * access the elements.
++ 		 */
++-		return ERR_PTR(-E2BIG);
+++		return -E2BIG;
+++
+++	return 0;
+++}
+++
+++static struct bpf_map *array_map_alloc(union bpf_attr *attr)
+++{
+++	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
+++	int ret, numa_node = bpf_map_attr_numa_node(attr);
+++	u32 elem_size, index_mask, max_entries;
+++	bool unpriv = !capable(CAP_SYS_ADMIN);
+++	u64 cost, array_size, mask64;
+++	struct bpf_map_memory mem;
+++	struct bpf_array *array;
++ 
++ 	elem_size = round_up(attr->value_size, 8);
++ 
++ 	max_entries = attr->max_entries;
++-	index_mask = roundup_pow_of_two(max_entries) - 1;
++ 
++-	if (unpriv)
+++	/* On 32 bit archs roundup_pow_of_two() with max_entries that has
+++	 * upper most bit set in u32 space is undefined behavior due to
+++	 * resulting 1U << 32, so do it manually here in u64 space.
+++	 */
+++	mask64 = fls_long(max_entries - 1);
+++	mask64 = 1ULL << mask64;
+++	mask64 -= 1;
+++
+++	index_mask = mask64;
+++	if (unpriv) {
++ 		/* round up array size to nearest power of 2,
++ 		 * since cpu will speculate within index_mask limits
++ 		 */
++ 		max_entries = index_mask + 1;
+++		/* Check for overflows. */
+++		if (max_entries < attr->max_entries)
+++			return ERR_PTR(-E2BIG);
+++	}
++ 
++-
++-	/* check round_up into zero and u32 overflow */
++-	if (elem_size == 0 ||
++-	    attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size)
++-		return ERR_PTR(-ENOMEM);
++-
++-	array_size = sizeof(*array) + max_entries * elem_size;
+++	array_size = sizeof(*array);
+++	if (percpu)
+++		array_size += (u64) max_entries * sizeof(void *);
+++	else
+++		array_size += (u64) max_entries * elem_size;
+++
+++	/* make sure there is no u32 overflow later in round_up() */
+++	cost = array_size;
+++	if (percpu)
+++		cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
+++
+++	ret = bpf_map_charge_init(&mem, cost);
+++	if (ret < 0)
+++		return ERR_PTR(ret);
++ 
++ 	/* allocate all map elements and zero-initialize them */
++-	array = kzalloc(array_size, GFP_USER | __GFP_NOWARN);
+++	array = bpf_map_area_alloc(array_size, numa_node);
++ 	if (!array) {
++-		array = vzalloc(array_size);
++-		if (!array)
++-			return ERR_PTR(-ENOMEM);
+++		bpf_map_charge_finish(&mem);
+++		return ERR_PTR(-ENOMEM);
++ 	}
++-
++ 	array->index_mask = index_mask;
++ 	array->map.unpriv_array = unpriv;
++ 
++ 	/* copy mandatory map attributes */
++-	array->map.key_size = attr->key_size;
++-	array->map.value_size = attr->value_size;
++-	array->map.max_entries = attr->max_entries;
++-	array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
+++	bpf_map_init_from_attr(&array->map, attr);
+++	bpf_map_charge_move(&array->map.memory, &mem);
++ 	array->elem_size = elem_size;
++ 
+++	if (percpu && bpf_array_alloc_percpu(array)) {
+++		bpf_map_charge_finish(&array->map.memory);
+++		bpf_map_area_free(array);
+++		return ERR_PTR(-ENOMEM);
+++	}
+++
++ 	return &array->map;
++ }
++ 
++@@ -82,17 +145,115 @@ static void *array_map_lookup_elem(struc
++ 	struct bpf_array *array = container_of(map, struct bpf_array, map);
++ 	u32 index = *(u32 *)key;
++ 
++-	if (index >= array->map.max_entries)
+++	if (unlikely(index >= array->map.max_entries))
++ 		return NULL;
++ 
++ 	return array->value + array->elem_size * (index & array->index_mask);
++ }
++ 
+++static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
+++				       u32 off)
+++{
+++	struct bpf_array *array = container_of(map, struct bpf_array, map);
+++
+++	if (map->max_entries != 1)
+++		return -ENOTSUPP;
+++	if (off >= map->value_size)
+++		return -EINVAL;
+++
+++	*imm = (unsigned long)array->value;
+++	return 0;
+++}
+++
+++static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
+++				       u32 *off)
+++{
+++	struct bpf_array *array = container_of(map, struct bpf_array, map);
+++	u64 base = (unsigned long)array->value;
+++	u64 range = array->elem_size;
+++
+++	if (map->max_entries != 1)
+++		return -ENOTSUPP;
+++	if (imm < base || imm >= base + range)
+++		return -ENOENT;
+++
+++	*off = imm - base;
+++	return 0;
+++}
+++
+++/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
+++static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+++{
+++	struct bpf_array *array = container_of(map, struct bpf_array, map);
+++	struct bpf_insn *insn = insn_buf;
+++	u32 elem_size = round_up(map->value_size, 8);
+++	const int ret = BPF_REG_0;
+++	const int map_ptr = BPF_REG_1;
+++	const int index = BPF_REG_2;
+++
+++	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
+++	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
+++	if (map->unpriv_array) {
+++		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
+++		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
+++	} else {
+++		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+++	}
+++
+++	if (is_power_of_2(elem_size)) {
+++		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
+++	} else {
+++		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
+++	}
+++	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
+++	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+++	*insn++ = BPF_MOV64_IMM(ret, 0);
+++	return insn - insn_buf;
+++}
+++
+++/* Called from eBPF program */
+++static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
+++{
+++	struct bpf_array *array = container_of(map, struct bpf_array, map);
+++	u32 index = *(u32 *)key;
+++
+++	if (unlikely(index >= array->map.max_entries))
+++		return NULL;
+++
+++	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
+++}
+++
+++int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
+++{
+++	struct bpf_array *array = container_of(map, struct bpf_array, map);
+++	u32 index = *(u32 *)key;
+++	void __percpu *pptr;
+++	int cpu, off = 0;
+++	u32 size;
+++
+++	if (unlikely(index >= array->map.max_entries))
+++		return -ENOENT;
+++
+++	/* per_cpu areas are zero-filled and bpf programs can only
+++	 * access 'value_size' of them, so copying rounded areas
+++	 * will not leak any kernel data
+++	 */
+++	size = round_up(map->value_size, 8);
+++	rcu_read_lock();
+++	pptr = array->pptrs[index & array->index_mask];
+++	for_each_possible_cpu(cpu) {
+++		bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
+++		off += size;
+++	}
+++	rcu_read_unlock();
+++	return 0;
+++}
+++
++ /* Called from syscall */
++ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
++ {
++ 	struct bpf_array *array = container_of(map, struct bpf_array, map);
++-	u32 index = *(u32 *)key;
+++	u32 index = key ? *(u32 *)key : U32_MAX;
++ 	u32 *next = (u32 *)next_key;
++ 
++ 	if (index >= array->map.max_entries) {
++@@ -113,22 +274,73 @@ static int array_map_update_elem(struct
++ {
++ 	struct bpf_array *array = container_of(map, struct bpf_array, map);
++ 	u32 index = *(u32 *)key;
+++	char *val;
++ 
++-	if (map_flags > BPF_EXIST)
+++	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
++ 		/* unknown flags */
++ 		return -EINVAL;
++ 
++-	if (index >= array->map.max_entries)
+++	if (unlikely(index >= array->map.max_entries))
+++		/* all elements were pre-allocated, cannot insert a new one */
+++		return -E2BIG;
+++
+++	if (unlikely(map_flags & BPF_NOEXIST))
+++		/* all elements already exist */
+++		return -EEXIST;
+++
+++	if (unlikely((map_flags & BPF_F_LOCK) &&
+++		     !map_value_has_spin_lock(map)))
+++		return -EINVAL;
+++
+++	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+++		memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
+++		       value, map->value_size);
+++	} else {
+++		val = array->value +
+++			array->elem_size * (index & array->index_mask);
+++		if (map_flags & BPF_F_LOCK)
+++			copy_map_value_locked(map, val, value, false);
+++		else
+++			copy_map_value(map, val, value);
+++	}
+++	return 0;
+++}
+++
+++int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
+++			    u64 map_flags)
+++{
+++	struct bpf_array *array = container_of(map, struct bpf_array, map);
+++	u32 index = *(u32 *)key;
+++	void __percpu *pptr;
+++	int cpu, off = 0;
+++	u32 size;
+++
+++	if (unlikely(map_flags > BPF_EXIST))
+++		/* unknown flags */
+++		return -EINVAL;
+++
+++	if (unlikely(index >= array->map.max_entries))
++ 		/* all elements were pre-allocated, cannot insert a new one */
++ 		return -E2BIG;
++ 
++-	if (map_flags == BPF_NOEXIST)
+++	if (unlikely(map_flags == BPF_NOEXIST))
++ 		/* all elements already exist */
++ 		return -EEXIST;
++ 
++-	memcpy(array->value +
++-		array->elem_size * (index & array->index_mask),
++-		value, map->value_size);
+++	/* the user space will provide round_up(value_size, 8) bytes that
+++	 * will be copied into per-cpu area. bpf programs can only access
+++	 * value_size of it. During lookup the same extra bytes will be
+++	 * returned or zeros which were zero-filled by percpu_alloc,
+++	 * so no kernel data leaks possible
+++	 */
+++	size = round_up(map->value_size, 8);
+++	rcu_read_lock();
+++	pptr = array->pptrs[index & array->index_mask];
+++	for_each_possible_cpu(cpu) {
+++		bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
+++		off += size;
+++	}
+++	rcu_read_unlock();
++ 	return 0;
++ }
++ 
++@@ -150,36 +362,124 @@ static void array_map_free(struct bpf_ma
++ 	 */
++ 	synchronize_rcu();
++ 
++-	kvfree(array);
+++	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+++		bpf_array_free_percpu(array);
+++
+++	bpf_map_area_free(array);
++ }
++ 
++-static const struct bpf_map_ops array_ops = {
+++static void array_map_seq_show_elem(struct bpf_map *map, void *key,
+++				    struct seq_file *m)
+++{
+++	void *value;
+++
+++	rcu_read_lock();
+++
+++	value = array_map_lookup_elem(map, key);
+++	if (!value) {
+++		rcu_read_unlock();
+++		return;
+++	}
+++
+++	if (map->btf_key_type_id)
+++		seq_printf(m, "%u: ", *(u32 *)key);
+++	btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
+++	seq_puts(m, "\n");
+++
+++	rcu_read_unlock();
+++}
+++
+++static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
+++					   struct seq_file *m)
+++{
+++	struct bpf_array *array = container_of(map, struct bpf_array, map);
+++	u32 index = *(u32 *)key;
+++	void __percpu *pptr;
+++	int cpu;
+++
+++	rcu_read_lock();
+++
+++	seq_printf(m, "%u: {\n", *(u32 *)key);
+++	pptr = array->pptrs[index & array->index_mask];
+++	for_each_possible_cpu(cpu) {
+++		seq_printf(m, "\tcpu%d: ", cpu);
+++		btf_type_seq_show(map->btf, map->btf_value_type_id,
+++				  per_cpu_ptr(pptr, cpu), m);
+++		seq_puts(m, "\n");
+++	}
+++	seq_puts(m, "}\n");
+++
+++	rcu_read_unlock();
+++}
+++
+++static int array_map_check_btf(const struct bpf_map *map,
+++			       const struct btf *btf,
+++			       const struct btf_type *key_type,
+++			       const struct btf_type *value_type)
+++{
+++	u32 int_data;
+++
+++	/* One exception for keyless BTF: .bss/.data/.rodata map */
+++	if (btf_type_is_void(key_type)) {
+++		if (map->map_type != BPF_MAP_TYPE_ARRAY ||
+++		    map->max_entries != 1)
+++			return -EINVAL;
+++
+++		if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
+++			return -EINVAL;
+++
+++		return 0;
+++	}
+++
+++	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
+++		return -EINVAL;
+++
+++	int_data = *(u32 *)(key_type + 1);
+++	/* bpf array can only take a u32 key. This check makes sure
+++	 * that the btf matches the attr used during map_create.
+++	 */
+++	if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
+++		return -EINVAL;
+++
+++	return 0;
+++}
+++
+++const struct bpf_map_ops array_map_ops = {
+++	.map_alloc_check = array_map_alloc_check,
++ 	.map_alloc = array_map_alloc,
++ 	.map_free = array_map_free,
++ 	.map_get_next_key = array_map_get_next_key,
++ 	.map_lookup_elem = array_map_lookup_elem,
++ 	.map_update_elem = array_map_update_elem,
++ 	.map_delete_elem = array_map_delete_elem,
+++	.map_gen_lookup = array_map_gen_lookup,
+++	.map_direct_value_addr = array_map_direct_value_addr,
+++	.map_direct_value_meta = array_map_direct_value_meta,
+++	.map_seq_show_elem = array_map_seq_show_elem,
+++	.map_check_btf = array_map_check_btf,
++ };
++ 
++-static struct bpf_map_type_list array_type __read_mostly = {
++-	.ops = &array_ops,
++-	.type = BPF_MAP_TYPE_ARRAY,
+++const struct bpf_map_ops percpu_array_map_ops = {
+++	.map_alloc_check = array_map_alloc_check,
+++	.map_alloc = array_map_alloc,
+++	.map_free = array_map_free,
+++	.map_get_next_key = array_map_get_next_key,
+++	.map_lookup_elem = percpu_array_map_lookup_elem,
+++	.map_update_elem = array_map_update_elem,
+++	.map_delete_elem = array_map_delete_elem,
+++	.map_seq_show_elem = percpu_array_map_seq_show_elem,
+++	.map_check_btf = array_map_check_btf,
++ };
++ 
++-static int __init register_array_map(void)
++-{
++-	bpf_register_map_type(&array_type);
++-	return 0;
++-}
++-late_initcall(register_array_map);
++-
++-static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr)
+++static int fd_array_map_alloc_check(union bpf_attr *attr)
++ {
++ 	/* only file descriptors can be stored in this type of map */
++ 	if (attr->value_size != sizeof(u32))
++-		return ERR_PTR(-EINVAL);
++-	return array_map_alloc(attr);
+++		return -EINVAL;
+++	/* Program read-only/write-only not supported for special maps yet. */
+++	if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
+++		return -EINVAL;
+++	return array_map_alloc_check(attr);
++ }
++ 
++ static void fd_array_map_free(struct bpf_map *map)
++@@ -192,17 +492,38 @@ static void fd_array_map_free(struct bpf
++ 	/* make sure it's empty */
++ 	for (i = 0; i < array->map.max_entries; i++)
++ 		BUG_ON(array->ptrs[i] != NULL);
++-	kvfree(array);
+++
+++	bpf_map_area_free(array);
++ }
++ 
++ static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
++ {
++-	return NULL;
+++	return ERR_PTR(-EOPNOTSUPP);
+++}
+++
+++/* only called from syscall */
+++int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+++{
+++	void **elem, *ptr;
+++	int ret =  0;
+++
+++	if (!map->ops->map_fd_sys_lookup_elem)
+++		return -ENOTSUPP;
+++
+++	rcu_read_lock();
+++	elem = array_map_lookup_elem(map, key);
+++	if (elem && (ptr = READ_ONCE(*elem)))
+++		*value = map->ops->map_fd_sys_lookup_elem(ptr);
+++	else
+++		ret = -ENOENT;
+++	rcu_read_unlock();
+++
+++	return ret;
++ }
++ 
++ /* only called from syscall */
++-static int fd_array_map_update_elem(struct bpf_map *map, void *key,
++-				    void *value, u64 map_flags)
+++int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
+++				 void *key, void *value, u64 map_flags)
++ {
++ 	struct bpf_array *array = container_of(map, struct bpf_array, map);
++ 	void *new_ptr, *old_ptr;
++@@ -215,7 +536,7 @@ static int fd_array_map_update_elem(stru
++ 		return -E2BIG;
++ 
++ 	ufd = *(u32 *)value;
++-	new_ptr = map->ops->map_fd_get_ptr(map, ufd);
+++	new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
++ 	if (IS_ERR(new_ptr))
++ 		return PTR_ERR(new_ptr);
++ 
++@@ -244,10 +565,12 @@ static int fd_array_map_delete_elem(stru
++ 	}
++ }
++ 
++-static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd)
+++static void *prog_fd_array_get_ptr(struct bpf_map *map,
+++				   struct file *map_file, int fd)
++ {
++ 	struct bpf_array *array = container_of(map, struct bpf_array, map);
++ 	struct bpf_prog *prog = bpf_prog_get(fd);
+++
++ 	if (IS_ERR(prog))
++ 		return prog;
++ 
++@@ -255,18 +578,22 @@ static void *prog_fd_array_get_ptr(struc
++ 		bpf_prog_put(prog);
++ 		return ERR_PTR(-EINVAL);
++ 	}
+++
++ 	return prog;
++ }
++ 
++ static void prog_fd_array_put_ptr(void *ptr)
++ {
++-	struct bpf_prog *prog = ptr;
+++	bpf_prog_put(ptr);
+++}
++ 
++-	bpf_prog_put_rcu(prog);
+++static u32 prog_fd_array_sys_lookup_elem(void *ptr)
+++{
+++	return ((struct bpf_prog *)ptr)->aux->id;
++ }
++ 
++ /* decrement refcnt of all bpf_progs that are stored in this map */
++-void bpf_fd_array_map_clear(struct bpf_map *map)
+++static void bpf_fd_array_map_clear(struct bpf_map *map)
++ {
++ 	struct bpf_array *array = container_of(map, struct bpf_array, map);
++ 	int i;
++@@ -275,91 +602,208 @@ void bpf_fd_array_map_clear(struct bpf_m
++ 		fd_array_map_delete_elem(map, &i);
++ }
++ 
++-static const struct bpf_map_ops prog_array_ops = {
++-	.map_alloc = fd_array_map_alloc,
+++static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
+++					 struct seq_file *m)
+++{
+++	void **elem, *ptr;
+++	u32 prog_id;
+++
+++	rcu_read_lock();
+++
+++	elem = array_map_lookup_elem(map, key);
+++	if (elem) {
+++		ptr = READ_ONCE(*elem);
+++		if (ptr) {
+++			seq_printf(m, "%u: ", *(u32 *)key);
+++			prog_id = prog_fd_array_sys_lookup_elem(ptr);
+++			btf_type_seq_show(map->btf, map->btf_value_type_id,
+++					  &prog_id, m);
+++			seq_puts(m, "\n");
+++		}
+++	}
+++
+++	rcu_read_unlock();
+++}
+++
+++const struct bpf_map_ops prog_array_map_ops = {
+++	.map_alloc_check = fd_array_map_alloc_check,
+++	.map_alloc = array_map_alloc,
++ 	.map_free = fd_array_map_free,
++ 	.map_get_next_key = array_map_get_next_key,
++ 	.map_lookup_elem = fd_array_map_lookup_elem,
++-	.map_update_elem = fd_array_map_update_elem,
++ 	.map_delete_elem = fd_array_map_delete_elem,
++ 	.map_fd_get_ptr = prog_fd_array_get_ptr,
++ 	.map_fd_put_ptr = prog_fd_array_put_ptr,
+++	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
+++	.map_release_uref = bpf_fd_array_map_clear,
+++	.map_seq_show_elem = prog_array_map_seq_show_elem,
++ };
++ 
++-static struct bpf_map_type_list prog_array_type __read_mostly = {
++-	.ops = &prog_array_ops,
++-	.type = BPF_MAP_TYPE_PROG_ARRAY,
++-};
+++static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
+++						   struct file *map_file)
+++{
+++	struct bpf_event_entry *ee;
+++
+++	ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
+++	if (ee) {
+++		ee->event = perf_file->private_data;
+++		ee->perf_file = perf_file;
+++		ee->map_file = map_file;
+++	}
++ 
++-static int __init register_prog_array_map(void)
+++	return ee;
+++}
+++
+++static void __bpf_event_entry_free(struct rcu_head *rcu)
++ {
++-	bpf_register_map_type(&prog_array_type);
++-	return 0;
+++	struct bpf_event_entry *ee;
+++
+++	ee = container_of(rcu, struct bpf_event_entry, rcu);
+++	fput(ee->perf_file);
+++	kfree(ee);
++ }
++-late_initcall(register_prog_array_map);
++ 
++-static void perf_event_array_map_free(struct bpf_map *map)
+++static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
++ {
++-	bpf_fd_array_map_clear(map);
++-	fd_array_map_free(map);
+++	call_rcu(&ee->rcu, __bpf_event_entry_free);
++ }
++ 
++-static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
+++static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
+++					 struct file *map_file, int fd)
++ {
+++	struct bpf_event_entry *ee;
++ 	struct perf_event *event;
++-	const struct perf_event_attr *attr;
+++	struct file *perf_file;
+++	u64 value;
++ 
++ 	event = perf_event_get(fd);
++ 	if (IS_ERR(event))
++ 		return event;
++ 
++-	attr = perf_event_attrs(event);
++-	if (IS_ERR(attr))
++-		goto err;
++-
++-	if (attr->inherit)
++-		goto err;
+++	value = perf_event_read_local(event);
++ 
++-	if (attr->type == PERF_TYPE_RAW)
++-		return event;
++-
++-	if (attr->type == PERF_TYPE_HARDWARE)
++-		return event;
+++	ee = bpf_event_entry_gen(perf_file, map_file);
+++	if (ee)
+++		return ee;
++ 
++-	if (attr->type == PERF_TYPE_SOFTWARE &&
++-	    attr->config == PERF_COUNT_SW_BPF_OUTPUT)
++-		return event;
++-err:
++-	perf_event_release_kernel(event);
++-	return ERR_PTR(-EINVAL);
+++	ee = ERR_PTR(-ENOMEM);
+++	return ee;
++ }
++ 
++ static void perf_event_fd_array_put_ptr(void *ptr)
++ {
++-	struct perf_event *event = ptr;
+++	bpf_event_entry_free_rcu(ptr);
+++}
++ 
++-	perf_event_release_kernel(event);
+++static void perf_event_fd_array_release(struct bpf_map *map,
+++					struct file *map_file)
+++{
+++	struct bpf_array *array = container_of(map, struct bpf_array, map);
+++	struct bpf_event_entry *ee;
+++	int i;
+++
+++	rcu_read_lock();
+++	for (i = 0; i < array->map.max_entries; i++) {
+++		ee = READ_ONCE(array->ptrs[i]);
+++		if (ee && ee->map_file == map_file)
+++			fd_array_map_delete_elem(map, &i);
+++	}
+++	rcu_read_unlock();
++ }
++ 
++-static const struct bpf_map_ops perf_event_array_ops = {
++-	.map_alloc = fd_array_map_alloc,
++-	.map_free = perf_event_array_map_free,
+++const struct bpf_map_ops perf_event_array_map_ops = {
+++	.map_alloc_check = fd_array_map_alloc_check,
+++	.map_alloc = array_map_alloc,
+++	.map_free = fd_array_map_free,
++ 	.map_get_next_key = array_map_get_next_key,
++ 	.map_lookup_elem = fd_array_map_lookup_elem,
++-	.map_update_elem = fd_array_map_update_elem,
++ 	.map_delete_elem = fd_array_map_delete_elem,
++ 	.map_fd_get_ptr = perf_event_fd_array_get_ptr,
++ 	.map_fd_put_ptr = perf_event_fd_array_put_ptr,
+++	.map_release = perf_event_fd_array_release,
+++	.map_check_btf = map_check_no_btf,
++ };
++ 
++-static struct bpf_map_type_list perf_event_array_type __read_mostly = {
++-	.ops = &perf_event_array_ops,
++-	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
++-};
+++static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
+++{
+++	struct bpf_map *map, *inner_map_meta;
+++
+++	inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
+++	if (IS_ERR(inner_map_meta))
+++		return inner_map_meta;
+++
+++	map = array_map_alloc(attr);
+++	if (IS_ERR(map)) {
+++		bpf_map_meta_free(inner_map_meta);
+++		return map;
+++	}
+++
+++	map->inner_map_meta = inner_map_meta;
++ 
++-static int __init register_perf_event_array_map(void)
+++	return map;
+++}
+++
+++static void array_of_map_free(struct bpf_map *map)
++ {
++-	bpf_register_map_type(&perf_event_array_type);
++-	return 0;
+++	/* map->inner_map_meta is only accessed by syscall which
+++	 * is protected by fdget/fdput.
+++	 */
+++	bpf_map_meta_free(map->inner_map_meta);
+++	bpf_fd_array_map_clear(map);
+++	fd_array_map_free(map);
++ }
++-late_initcall(register_perf_event_array_map);
+++
+++static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
+++{
+++	struct bpf_map **inner_map = array_map_lookup_elem(map, key);
+++
+++	if (!inner_map)
+++		return NULL;
+++
+++	return READ_ONCE(*inner_map);
+++}
+++
+++static u32 array_of_map_gen_lookup(struct bpf_map *map,
+++				   struct bpf_insn *insn_buf)
+++{
+++	struct bpf_array *array = container_of(map, struct bpf_array, map);
+++	u32 elem_size = round_up(map->value_size, 8);
+++	struct bpf_insn *insn = insn_buf;
+++	const int ret = BPF_REG_0;
+++	const int map_ptr = BPF_REG_1;
+++	const int index = BPF_REG_2;
+++
+++	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
+++	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
+++	if (map->unpriv_array) {
+++		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
+++		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
+++	} else {
+++		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+++	}
+++	if (is_power_of_2(elem_size))
+++		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
+++	else
+++		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
+++	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
+++	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
+++	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
+++	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+++	*insn++ = BPF_MOV64_IMM(ret, 0);
+++
+++	return insn - insn_buf;
+++}
+++
+++const struct bpf_map_ops array_of_maps_map_ops = {
+++	.map_alloc_check = fd_array_map_alloc_check,
+++	.map_alloc = array_of_map_alloc,
+++	.map_free = array_of_map_free,
+++	.map_get_next_key = array_map_get_next_key,
+++	.map_lookup_elem = array_of_map_lookup_elem,
+++	.map_delete_elem = fd_array_map_delete_elem,
+++	.map_fd_get_ptr = bpf_map_fd_get_ptr,
+++	.map_fd_put_ptr = bpf_map_fd_put_ptr,
+++	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
+++	.map_gen_lookup = array_of_map_gen_lookup,
+++	.map_check_btf = map_check_no_btf,
+++};
++--- /dev/null
+++++ b/kernel/bpf/bpf_lru_list.c
++@@ -0,0 +1,695 @@
+++// SPDX-License-Identifier: GPL-2.0-only
+++/* Copyright (c) 2016 Facebook
+++ */
+++#include <linux/cpumask.h>
+++#include <linux/spinlock.h>
+++#include <linux/percpu.h>
+++
+++#include "bpf_lru_list.h"
+++
+++#define LOCAL_FREE_TARGET		(128)
+++#define LOCAL_NR_SCANS			LOCAL_FREE_TARGET
+++
+++#define PERCPU_FREE_TARGET		(4)
+++#define PERCPU_NR_SCANS			PERCPU_FREE_TARGET
+++
+++/* Helpers to get the local list index */
+++#define LOCAL_LIST_IDX(t)	((t) - BPF_LOCAL_LIST_T_OFFSET)
+++#define LOCAL_FREE_LIST_IDX	LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_FREE)
+++#define LOCAL_PENDING_LIST_IDX	LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING)
+++#define IS_LOCAL_LIST_TYPE(t)	((t) >= BPF_LOCAL_LIST_T_OFFSET)
+++
+++static int get_next_cpu(int cpu)
+++{
+++	cpu = cpumask_next(cpu, cpu_possible_mask);
+++	if (cpu >= nr_cpu_ids)
+++		cpu = cpumask_first(cpu_possible_mask);
+++	return cpu;
+++}
+++
+++/* Local list helpers */
+++static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l)
+++{
+++	return &loc_l->lists[LOCAL_FREE_LIST_IDX];
+++}
+++
+++static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l)
+++{
+++	return &loc_l->lists[LOCAL_PENDING_LIST_IDX];
+++}
+++
+++/* bpf_lru_node helpers */
+++static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
+++{
+++	return node->ref;
+++}
+++
+++static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
+++				   enum bpf_lru_list_type type)
+++{
+++	if (type < NR_BPF_LRU_LIST_COUNT)
+++		l->counts[type]++;
+++}
+++
+++static void bpf_lru_list_count_dec(struct bpf_lru_list *l,
+++				   enum bpf_lru_list_type type)
+++{
+++	if (type < NR_BPF_LRU_LIST_COUNT)
+++		l->counts[type]--;
+++}
+++
+++static void __bpf_lru_node_move_to_free(struct bpf_lru_list *l,
+++					struct bpf_lru_node *node,
+++					struct list_head *free_list,
+++					enum bpf_lru_list_type tgt_free_type)
+++{
+++	if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)))
+++		return;
+++
+++	/* If the removing node is the next_inactive_rotation candidate,
+++	 * move the next_inactive_rotation pointer also.
+++	 */
+++	if (&node->list == l->next_inactive_rotation)
+++		l->next_inactive_rotation = l->next_inactive_rotation->prev;
+++
+++	bpf_lru_list_count_dec(l, node->type);
+++
+++	node->type = tgt_free_type;
+++	list_move(&node->list, free_list);
+++}
+++
+++/* Move nodes from local list to the LRU list */
+++static void __bpf_lru_node_move_in(struct bpf_lru_list *l,
+++				   struct bpf_lru_node *node,
+++				   enum bpf_lru_list_type tgt_type)
+++{
+++	if (WARN_ON_ONCE(!IS_LOCAL_LIST_TYPE(node->type)) ||
+++	    WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type)))
+++		return;
+++
+++	bpf_lru_list_count_inc(l, tgt_type);
+++	node->type = tgt_type;
+++	node->ref = 0;
+++	list_move(&node->list, &l->lists[tgt_type]);
+++}
+++
+++/* Move nodes between or within active and inactive list (like
+++ * active to inactive, inactive to active or tail of active back to
+++ * the head of active).
+++ */
+++static void __bpf_lru_node_move(struct bpf_lru_list *l,
+++				struct bpf_lru_node *node,
+++				enum bpf_lru_list_type tgt_type)
+++{
+++	if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)) ||
+++	    WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type)))
+++		return;
+++
+++	if (node->type != tgt_type) {
+++		bpf_lru_list_count_dec(l, node->type);
+++		bpf_lru_list_count_inc(l, tgt_type);
+++		node->type = tgt_type;
+++	}
+++	node->ref = 0;
+++
+++	/* If the moving node is the next_inactive_rotation candidate,
+++	 * move the next_inactive_rotation pointer also.
+++	 */
+++	if (&node->list == l->next_inactive_rotation)
+++		l->next_inactive_rotation = l->next_inactive_rotation->prev;
+++
+++	list_move(&node->list, &l->lists[tgt_type]);
+++}
+++
+++static bool bpf_lru_list_inactive_low(const struct bpf_lru_list *l)
+++{
+++	return l->counts[BPF_LRU_LIST_T_INACTIVE] <
+++		l->counts[BPF_LRU_LIST_T_ACTIVE];
+++}
+++
+++/* Rotate the active list:
+++ * 1. Start from tail
+++ * 2. If the node has the ref bit set, it will be rotated
+++ *    back to the head of active list with the ref bit cleared.
+++ *    Give this node one more chance to survive in the active list.
+++ * 3. If the ref bit is not set, move it to the head of the
+++ *    inactive list.
+++ * 4. It will at most scan nr_scans nodes
+++ */
+++static void __bpf_lru_list_rotate_active(struct bpf_lru *lru,
+++					 struct bpf_lru_list *l)
+++{
+++	struct list_head *active = &l->lists[BPF_LRU_LIST_T_ACTIVE];
+++	struct bpf_lru_node *node, *tmp_node, *first_node;
+++	unsigned int i = 0;
+++
+++	first_node = list_first_entry(active, struct bpf_lru_node, list);
+++	list_for_each_entry_safe_reverse(node, tmp_node, active, list) {
+++		if (bpf_lru_node_is_ref(node))
+++			__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
+++		else
+++			__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
+++
+++		if (++i == lru->nr_scans || node == first_node)
+++			break;
+++	}
+++}
+++
+++/* Rotate the inactive list.  It starts from the next_inactive_rotation
+++ * 1. If the node has ref bit set, it will be moved to the head
+++ *    of active list with the ref bit cleared.
+++ * 2. If the node does not have ref bit set, it will leave it
+++ *    at its current location (i.e. do nothing) so that it can
+++ *    be considered during the next inactive_shrink.
+++ * 3. It will at most scan nr_scans nodes
+++ */
+++static void __bpf_lru_list_rotate_inactive(struct bpf_lru *lru,
+++					   struct bpf_lru_list *l)
+++{
+++	struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE];
+++	struct list_head *cur, *last, *next = inactive;
+++	struct bpf_lru_node *node;
+++	unsigned int i = 0;
+++
+++	if (list_empty(inactive))
+++		return;
+++
+++	last = l->next_inactive_rotation->next;
+++	if (last == inactive)
+++		last = last->next;
+++
+++	cur = l->next_inactive_rotation;
+++	while (i < lru->nr_scans) {
+++		if (cur == inactive) {
+++			cur = cur->prev;
+++			continue;
+++		}
+++
+++		node = list_entry(cur, struct bpf_lru_node, list);
+++		next = cur->prev;
+++		if (bpf_lru_node_is_ref(node))
+++			__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
+++		if (cur == last)
+++			break;
+++		cur = next;
+++		i++;
+++	}
+++
+++	l->next_inactive_rotation = next;
+++}
+++
+++/* Shrink the inactive list.  It starts from the tail of the
+++ * inactive list and only move the nodes without the ref bit
+++ * set to the designated free list.
+++ */
+++static unsigned int
+++__bpf_lru_list_shrink_inactive(struct bpf_lru *lru,
+++			       struct bpf_lru_list *l,
+++			       unsigned int tgt_nshrink,
+++			       struct list_head *free_list,
+++			       enum bpf_lru_list_type tgt_free_type)
+++{
+++	struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE];
+++	struct bpf_lru_node *node, *tmp_node;
+++	unsigned int nshrinked = 0;
+++	unsigned int i = 0;
+++
+++	list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) {
+++		if (bpf_lru_node_is_ref(node)) {
+++			__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
+++		} else if (lru->del_from_htab(lru->del_arg, node)) {
+++			__bpf_lru_node_move_to_free(l, node, free_list,
+++						    tgt_free_type);
+++			if (++nshrinked == tgt_nshrink)
+++				break;
+++		}
+++
+++		if (++i == lru->nr_scans)
+++			break;
+++	}
+++
+++	return nshrinked;
+++}
+++
+++/* 1. Rotate the active list (if needed)
+++ * 2. Always rotate the inactive list
+++ */
+++static void __bpf_lru_list_rotate(struct bpf_lru *lru, struct bpf_lru_list *l)
+++{
+++	if (bpf_lru_list_inactive_low(l))
+++		__bpf_lru_list_rotate_active(lru, l);
+++
+++	__bpf_lru_list_rotate_inactive(lru, l);
+++}
+++
+++/* Calls __bpf_lru_list_shrink_inactive() to shrink some
+++ * ref-bit-cleared nodes and move them to the designated
+++ * free list.
+++ *
+++ * If it cannot get a free node after calling
+++ * __bpf_lru_list_shrink_inactive().  It will just remove
+++ * one node from either inactive or active list without
+++ * honoring the ref-bit.  It prefers inactive list to active
+++ * list in this situation.
+++ */
+++static unsigned int __bpf_lru_list_shrink(struct bpf_lru *lru,
+++					  struct bpf_lru_list *l,
+++					  unsigned int tgt_nshrink,
+++					  struct list_head *free_list,
+++					  enum bpf_lru_list_type tgt_free_type)
+++
+++{
+++	struct bpf_lru_node *node, *tmp_node;
+++	struct list_head *force_shrink_list;
+++	unsigned int nshrinked;
+++
+++	nshrinked = __bpf_lru_list_shrink_inactive(lru, l, tgt_nshrink,
+++						   free_list, tgt_free_type);
+++	if (nshrinked)
+++		return nshrinked;
+++
+++	/* Do a force shrink by ignoring the reference bit */
+++	if (!list_empty(&l->lists[BPF_LRU_LIST_T_INACTIVE]))
+++		force_shrink_list = &l->lists[BPF_LRU_LIST_T_INACTIVE];
+++	else
+++		force_shrink_list = &l->lists[BPF_LRU_LIST_T_ACTIVE];
+++
+++	list_for_each_entry_safe_reverse(node, tmp_node, force_shrink_list,
+++					 list) {
+++		if (lru->del_from_htab(lru->del_arg, node)) {
+++			__bpf_lru_node_move_to_free(l, node, free_list,
+++						    tgt_free_type);
+++			return 1;
+++		}
+++	}
+++
+++	return 0;
+++}
+++
+++/* Flush the nodes from the local pending list to the LRU list */
+++static void __local_list_flush(struct bpf_lru_list *l,
+++			       struct bpf_lru_locallist *loc_l)
+++{
+++	struct bpf_lru_node *node, *tmp_node;
+++
+++	list_for_each_entry_safe_reverse(node, tmp_node,
+++					 local_pending_list(loc_l), list) {
+++		if (bpf_lru_node_is_ref(node))
+++			__bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_ACTIVE);
+++		else
+++			__bpf_lru_node_move_in(l, node,
+++					       BPF_LRU_LIST_T_INACTIVE);
+++	}
+++}
+++
+++static void bpf_lru_list_push_free(struct bpf_lru_list *l,
+++				   struct bpf_lru_node *node)
+++{
+++	unsigned long flags;
+++
+++	if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)))
+++		return;
+++
+++	raw_spin_lock_irqsave(&l->lock, flags);
+++	__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE);
+++	raw_spin_unlock_irqrestore(&l->lock, flags);
+++}
+++
+++static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru,
+++					   struct bpf_lru_locallist *loc_l)
+++{
+++	struct bpf_lru_list *l = &lru->common_lru.lru_list;
+++	struct bpf_lru_node *node, *tmp_node;
+++	unsigned int nfree = 0;
+++
+++	raw_spin_lock(&l->lock);
+++
+++	__local_list_flush(l, loc_l);
+++
+++	__bpf_lru_list_rotate(lru, l);
+++
+++	list_for_each_entry_safe(node, tmp_node, &l->lists[BPF_LRU_LIST_T_FREE],
+++				 list) {
+++		__bpf_lru_node_move_to_free(l, node, local_free_list(loc_l),
+++					    BPF_LRU_LOCAL_LIST_T_FREE);
+++		if (++nfree == LOCAL_FREE_TARGET)
+++			break;
+++	}
+++
+++	if (nfree < LOCAL_FREE_TARGET)
+++		__bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree,
+++				      local_free_list(loc_l),
+++				      BPF_LRU_LOCAL_LIST_T_FREE);
+++
+++	raw_spin_unlock(&l->lock);
+++}
+++
+++static void __local_list_add_pending(struct bpf_lru *lru,
+++				     struct bpf_lru_locallist *loc_l,
+++				     int cpu,
+++				     struct bpf_lru_node *node,
+++				     u32 hash)
+++{
+++	*(u32 *)((void *)node + lru->hash_offset) = hash;
+++	node->cpu = cpu;
+++	node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
+++	node->ref = 0;
+++	list_add(&node->list, local_pending_list(loc_l));
+++}
+++
+++static struct bpf_lru_node *
+++__local_list_pop_free(struct bpf_lru_locallist *loc_l)
+++{
+++	struct bpf_lru_node *node;
+++
+++	node = list_first_entry_or_null(local_free_list(loc_l),
+++					struct bpf_lru_node,
+++					list);
+++	if (node)
+++		list_del(&node->list);
+++
+++	return node;
+++}
+++
+++static struct bpf_lru_node *
+++__local_list_pop_pending(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l)
+++{
+++	struct bpf_lru_node *node;
+++	bool force = false;
+++
+++ignore_ref:
+++	/* Get from the tail (i.e. older element) of the pending list. */
+++	list_for_each_entry_reverse(node, local_pending_list(loc_l),
+++				    list) {
+++		if ((!bpf_lru_node_is_ref(node) || force) &&
+++		    lru->del_from_htab(lru->del_arg, node)) {
+++			list_del(&node->list);
+++			return node;
+++		}
+++	}
+++
+++	if (!force) {
+++		force = true;
+++		goto ignore_ref;
+++	}
+++
+++	return NULL;
+++}
+++
+++static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru,
+++						    u32 hash)
+++{
+++	struct list_head *free_list;
+++	struct bpf_lru_node *node = NULL;
+++	struct bpf_lru_list *l;
+++	unsigned long flags;
+++	int cpu = raw_smp_processor_id();
+++
+++	l = per_cpu_ptr(lru->percpu_lru, cpu);
+++
+++	raw_spin_lock_irqsave(&l->lock, flags);
+++
+++	__bpf_lru_list_rotate(lru, l);
+++
+++	free_list = &l->lists[BPF_LRU_LIST_T_FREE];
+++	if (list_empty(free_list))
+++		__bpf_lru_list_shrink(lru, l, PERCPU_FREE_TARGET, free_list,
+++				      BPF_LRU_LIST_T_FREE);
+++
+++	if (!list_empty(free_list)) {
+++		node = list_first_entry(free_list, struct bpf_lru_node, list);
+++		*(u32 *)((void *)node + lru->hash_offset) = hash;
+++		node->ref = 0;
+++		__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
+++	}
+++
+++	raw_spin_unlock_irqrestore(&l->lock, flags);
+++
+++	return node;
+++}
+++
+++static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru,
+++						    u32 hash)
+++{
+++	struct bpf_lru_locallist *loc_l, *steal_loc_l;
+++	struct bpf_common_lru *clru = &lru->common_lru;
+++	struct bpf_lru_node *node;
+++	int steal, first_steal;
+++	unsigned long flags;
+++	int cpu = raw_smp_processor_id();
+++
+++	loc_l = per_cpu_ptr(clru->local_list, cpu);
+++
+++	raw_spin_lock_irqsave(&loc_l->lock, flags);
+++
+++	node = __local_list_pop_free(loc_l);
+++	if (!node) {
+++		bpf_lru_list_pop_free_to_local(lru, loc_l);
+++		node = __local_list_pop_free(loc_l);
+++	}
+++
+++	if (node)
+++		__local_list_add_pending(lru, loc_l, cpu, node, hash);
+++
+++	raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+++
+++	if (node)
+++		return node;
+++
+++	/* No free nodes found from the local free list and
+++	 * the global LRU list.
+++	 *
+++	 * Steal from the local free/pending list of the
+++	 * current CPU and remote CPU in RR.  It starts
+++	 * with the loc_l->next_steal CPU.
+++	 */
+++
+++	first_steal = loc_l->next_steal;
+++	steal = first_steal;
+++	do {
+++		steal_loc_l = per_cpu_ptr(clru->local_list, steal);
+++
+++		raw_spin_lock_irqsave(&steal_loc_l->lock, flags);
+++
+++		node = __local_list_pop_free(steal_loc_l);
+++		if (!node)
+++			node = __local_list_pop_pending(lru, steal_loc_l);
+++
+++		raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags);
+++
+++		steal = get_next_cpu(steal);
+++	} while (!node && steal != first_steal);
+++
+++	loc_l->next_steal = steal;
+++
+++	if (node) {
+++		raw_spin_lock_irqsave(&loc_l->lock, flags);
+++		__local_list_add_pending(lru, loc_l, cpu, node, hash);
+++		raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+++	}
+++
+++	return node;
+++}
+++
+++struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash)
+++{
+++	if (lru->percpu)
+++		return bpf_percpu_lru_pop_free(lru, hash);
+++	else
+++		return bpf_common_lru_pop_free(lru, hash);
+++}
+++
+++static void bpf_common_lru_push_free(struct bpf_lru *lru,
+++				     struct bpf_lru_node *node)
+++{
+++	u8 node_type = READ_ONCE(node->type);
+++	unsigned long flags;
+++
+++	if (WARN_ON_ONCE(node_type == BPF_LRU_LIST_T_FREE) ||
+++	    WARN_ON_ONCE(node_type == BPF_LRU_LOCAL_LIST_T_FREE))
+++		return;
+++
+++	if (node_type == BPF_LRU_LOCAL_LIST_T_PENDING) {
+++		struct bpf_lru_locallist *loc_l;
+++
+++		loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu);
+++
+++		raw_spin_lock_irqsave(&loc_l->lock, flags);
+++
+++		if (unlikely(node->type != BPF_LRU_LOCAL_LIST_T_PENDING)) {
+++			raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+++			goto check_lru_list;
+++		}
+++
+++		node->type = BPF_LRU_LOCAL_LIST_T_FREE;
+++		node->ref = 0;
+++		list_move(&node->list, local_free_list(loc_l));
+++
+++		raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+++		return;
+++	}
+++
+++check_lru_list:
+++	bpf_lru_list_push_free(&lru->common_lru.lru_list, node);
+++}
+++
+++static void bpf_percpu_lru_push_free(struct bpf_lru *lru,
+++				     struct bpf_lru_node *node)
+++{
+++	struct bpf_lru_list *l;
+++	unsigned long flags;
+++
+++	l = per_cpu_ptr(lru->percpu_lru, node->cpu);
+++
+++	raw_spin_lock_irqsave(&l->lock, flags);
+++
+++	__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE);
+++
+++	raw_spin_unlock_irqrestore(&l->lock, flags);
+++}
+++
+++void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node)
+++{
+++	if (lru->percpu)
+++		bpf_percpu_lru_push_free(lru, node);
+++	else
+++		bpf_common_lru_push_free(lru, node);
+++}
+++
+++static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
+++				    u32 node_offset, u32 elem_size,
+++				    u32 nr_elems)
+++{
+++	struct bpf_lru_list *l = &lru->common_lru.lru_list;
+++	u32 i;
+++
+++	for (i = 0; i < nr_elems; i++) {
+++		struct bpf_lru_node *node;
+++
+++		node = (struct bpf_lru_node *)(buf + node_offset);
+++		node->type = BPF_LRU_LIST_T_FREE;
+++		node->ref = 0;
+++		list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
+++		buf += elem_size;
+++	}
+++}
+++
+++static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf,
+++				    u32 node_offset, u32 elem_size,
+++				    u32 nr_elems)
+++{
+++	u32 i, pcpu_entries;
+++	int cpu;
+++	struct bpf_lru_list *l;
+++
+++	pcpu_entries = nr_elems / num_possible_cpus();
+++
+++	i = 0;
+++
+++	for_each_possible_cpu(cpu) {
+++		struct bpf_lru_node *node;
+++
+++		l = per_cpu_ptr(lru->percpu_lru, cpu);
+++again:
+++		node = (struct bpf_lru_node *)(buf + node_offset);
+++		node->cpu = cpu;
+++		node->type = BPF_LRU_LIST_T_FREE;
+++		node->ref = 0;
+++		list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
+++		i++;
+++		buf += elem_size;
+++		if (i == nr_elems)
+++			break;
+++		if (i % pcpu_entries)
+++			goto again;
+++	}
+++}
+++
+++void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
+++		      u32 elem_size, u32 nr_elems)
+++{
+++	if (lru->percpu)
+++		bpf_percpu_lru_populate(lru, buf, node_offset, elem_size,
+++					nr_elems);
+++	else
+++		bpf_common_lru_populate(lru, buf, node_offset, elem_size,
+++					nr_elems);
+++}
+++
+++static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu)
+++{
+++	int i;
+++
+++	for (i = 0; i < NR_BPF_LRU_LOCAL_LIST_T; i++)
+++		INIT_LIST_HEAD(&loc_l->lists[i]);
+++
+++	loc_l->next_steal = cpu;
+++
+++	raw_spin_lock_init(&loc_l->lock);
+++}
+++
+++static void bpf_lru_list_init(struct bpf_lru_list *l)
+++{
+++	int i;
+++
+++	for (i = 0; i < NR_BPF_LRU_LIST_T; i++)
+++		INIT_LIST_HEAD(&l->lists[i]);
+++
+++	for (i = 0; i < NR_BPF_LRU_LIST_COUNT; i++)
+++		l->counts[i] = 0;
+++
+++	l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE];
+++
+++	raw_spin_lock_init(&l->lock);
+++}
+++
+++int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
+++		 del_from_htab_func del_from_htab, void *del_arg)
+++{
+++	int cpu;
+++
+++	if (percpu) {
+++		lru->percpu_lru = alloc_percpu(struct bpf_lru_list);
+++		if (!lru->percpu_lru)
+++			return -ENOMEM;
+++
+++		for_each_possible_cpu(cpu) {
+++			struct bpf_lru_list *l;
+++
+++			l = per_cpu_ptr(lru->percpu_lru, cpu);
+++			bpf_lru_list_init(l);
+++		}
+++		lru->nr_scans = PERCPU_NR_SCANS;
+++	} else {
+++		struct bpf_common_lru *clru = &lru->common_lru;
+++
+++		clru->local_list = alloc_percpu(struct bpf_lru_locallist);
+++		if (!clru->local_list)
+++			return -ENOMEM;
+++
+++		for_each_possible_cpu(cpu) {
+++			struct bpf_lru_locallist *loc_l;
+++
+++			loc_l = per_cpu_ptr(clru->local_list, cpu);
+++			bpf_lru_locallist_init(loc_l, cpu);
+++		}
+++
+++		bpf_lru_list_init(&clru->lru_list);
+++		lru->nr_scans = LOCAL_NR_SCANS;
+++	}
+++
+++	lru->percpu = percpu;
+++	lru->del_from_htab = del_from_htab;
+++	lru->del_arg = del_arg;
+++	lru->hash_offset = hash_offset;
+++
+++	return 0;
+++}
+++
+++void bpf_lru_destroy(struct bpf_lru *lru)
+++{
+++	if (lru->percpu)
+++		free_percpu(lru->percpu_lru);
+++	else
+++		free_percpu(lru->common_lru.local_list);
+++}
++--- /dev/null
+++++ b/kernel/bpf/bpf_lru_list.h
++@@ -0,0 +1,82 @@
+++/* SPDX-License-Identifier: GPL-2.0-only */
+++/* Copyright (c) 2016 Facebook
+++ */
+++#ifndef __BPF_LRU_LIST_H_
+++#define __BPF_LRU_LIST_H_
+++
+++#include <linux/list.h>
+++#include <linux/spinlock_types.h>
+++
+++#define NR_BPF_LRU_LIST_T	(3)
+++#define NR_BPF_LRU_LIST_COUNT	(2)
+++#define NR_BPF_LRU_LOCAL_LIST_T (2)
+++#define BPF_LOCAL_LIST_T_OFFSET NR_BPF_LRU_LIST_T
+++
+++enum bpf_lru_list_type {
+++	BPF_LRU_LIST_T_ACTIVE,
+++	BPF_LRU_LIST_T_INACTIVE,
+++	BPF_LRU_LIST_T_FREE,
+++	BPF_LRU_LOCAL_LIST_T_FREE,
+++	BPF_LRU_LOCAL_LIST_T_PENDING,
+++};
+++
+++struct bpf_lru_node {
+++	struct list_head list;
+++	u16 cpu;
+++	u8 type;
+++	u8 ref;
+++};
+++
+++struct bpf_lru_list {
+++	struct list_head lists[NR_BPF_LRU_LIST_T];
+++	unsigned int counts[NR_BPF_LRU_LIST_COUNT];
+++	/* The next inacitve list rotation starts from here */
+++	struct list_head *next_inactive_rotation;
+++
+++	raw_spinlock_t lock ____cacheline_aligned_in_smp;
+++};
+++
+++struct bpf_lru_locallist {
+++	struct list_head lists[NR_BPF_LRU_LOCAL_LIST_T];
+++	u16 next_steal;
+++	raw_spinlock_t lock;
+++};
+++
+++struct bpf_common_lru {
+++	struct bpf_lru_list lru_list;
+++	struct bpf_lru_locallist __percpu *local_list;
+++};
+++
+++typedef bool (*del_from_htab_func)(void *arg, struct bpf_lru_node *node);
+++
+++struct bpf_lru {
+++	union {
+++		struct bpf_common_lru common_lru;
+++		struct bpf_lru_list __percpu *percpu_lru;
+++	};
+++	del_from_htab_func del_from_htab;
+++	void *del_arg;
+++	unsigned int hash_offset;
+++	unsigned int nr_scans;
+++	bool percpu;
+++};
+++
+++static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
+++{
+++	/* ref is an approximation on access frequency.  It does not
+++	 * have to be very accurate.  Hence, no protection is used.
+++	 */
+++	if (!node->ref)
+++		node->ref = 1;
+++}
+++
+++int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
+++		 del_from_htab_func del_from_htab, void *delete_arg);
+++void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
+++		      u32 elem_size, u32 nr_elems);
+++void bpf_lru_destroy(struct bpf_lru *lru);
+++struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash);
+++void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node);
+++void bpf_lru_promote(struct bpf_lru *lru, struct bpf_lru_node *node);
+++
+++#endif
++--- /dev/null
+++++ b/kernel/bpf/btf.c
++@@ -0,0 +1,3514 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
+++/* Copyright (c) 2018 Facebook */
+++
+++#include <uapi/linux/btf.h>
+++#include <uapi/linux/types.h>
+++#include <linux/seq_file.h>
+++#include <linux/compiler.h>
+++#include <linux/ctype.h>
+++#include <linux/errno.h>
+++#include <linux/slab.h>
+++#include <linux/anon_inodes.h>
+++#include <linux/file.h>
+++#include <linux/uaccess.h>
+++#include <linux/kernel.h>
+++#include <linux/idr.h>
+++#include <linux/sort.h>
+++#include <linux/bpf_verifier.h>
+++#include <linux/btf.h>
+++
+++/* BTF (BPF Type Format) is the meta data format which describes
+++ * the data types of BPF program/map.  Hence, it basically focus
+++ * on the C programming language which the modern BPF is primary
+++ * using.
+++ *
+++ * ELF Section:
+++ * ~~~~~~~~~~~
+++ * The BTF data is stored under the ".BTF" ELF section
+++ *
+++ * struct btf_type:
+++ * ~~~~~~~~~~~~~~~
+++ * Each 'struct btf_type' object describes a C data type.
+++ * Depending on the type it is describing, a 'struct btf_type'
+++ * object may be followed by more data.  F.e.
+++ * To describe an array, 'struct btf_type' is followed by
+++ * 'struct btf_array'.
+++ *
+++ * 'struct btf_type' and any extra data following it are
+++ * 4 bytes aligned.
+++ *
+++ * Type section:
+++ * ~~~~~~~~~~~~~
+++ * The BTF type section contains a list of 'struct btf_type' objects.
+++ * Each one describes a C type.  Recall from the above section
+++ * that a 'struct btf_type' object could be immediately followed by extra
+++ * data in order to desribe some particular C types.
+++ *
+++ * type_id:
+++ * ~~~~~~~
+++ * Each btf_type object is identified by a type_id.  The type_id
+++ * is implicitly implied by the location of the btf_type object in
+++ * the BTF type section.  The first one has type_id 1.  The second
+++ * one has type_id 2...etc.  Hence, an earlier btf_type has
+++ * a smaller type_id.
+++ *
+++ * A btf_type object may refer to another btf_type object by using
+++ * type_id (i.e. the "type" in the "struct btf_type").
+++ *
+++ * NOTE that we cannot assume any reference-order.
+++ * A btf_type object can refer to an earlier btf_type object
+++ * but it can also refer to a later btf_type object.
+++ *
+++ * For example, to describe "const void *".  A btf_type
+++ * object describing "const" may refer to another btf_type
+++ * object describing "void *".  This type-reference is done
+++ * by specifying type_id:
+++ *
+++ * [1] CONST (anon) type_id=2
+++ * [2] PTR (anon) type_id=0
+++ *
+++ * The above is the btf_verifier debug log:
+++ *   - Each line started with "[?]" is a btf_type object
+++ *   - [?] is the type_id of the btf_type object.
+++ *   - CONST/PTR is the BTF_KIND_XXX
+++ *   - "(anon)" is the name of the type.  It just
+++ *     happens that CONST and PTR has no name.
+++ *   - type_id=XXX is the 'u32 type' in btf_type
+++ *
+++ * NOTE: "void" has type_id 0
+++ *
+++ * String section:
+++ * ~~~~~~~~~~~~~~
+++ * The BTF string section contains the names used by the type section.
+++ * Each string is referred by an "offset" from the beginning of the
+++ * string section.
+++ *
+++ * Each string is '\0' terminated.
+++ *
+++ * The first character in the string section must be '\0'
+++ * which is used to mean 'anonymous'. Some btf_type may not
+++ * have a name.
+++ */
+++
+++/* BTF verification:
+++ *
+++ * To verify BTF data, two passes are needed.
+++ *
+++ * Pass #1
+++ * ~~~~~~~
+++ * The first pass is to collect all btf_type objects to
+++ * an array: "btf->types".
+++ *
+++ * Depending on the C type that a btf_type is describing,
+++ * a btf_type may be followed by extra data.  We don't know
+++ * how many btf_type is there, and more importantly we don't
+++ * know where each btf_type is located in the type section.
+++ *
+++ * Without knowing the location of each type_id, most verifications
+++ * cannot be done.  e.g. an earlier btf_type may refer to a later
+++ * btf_type (recall the "const void *" above), so we cannot
+++ * check this type-reference in the first pass.
+++ *
+++ * In the first pass, it still does some verifications (e.g.
+++ * checking the name is a valid offset to the string section).
+++ *
+++ * Pass #2
+++ * ~~~~~~~
+++ * The main focus is to resolve a btf_type that is referring
+++ * to another type.
+++ *
+++ * We have to ensure the referring type:
+++ * 1) does exist in the BTF (i.e. in btf->types[])
+++ * 2) does not cause a loop:
+++ *	struct A {
+++ *		struct B b;
+++ *	};
+++ *
+++ *	struct B {
+++ *		struct A a;
+++ *	};
+++ *
+++ * btf_type_needs_resolve() decides if a btf_type needs
+++ * to be resolved.
+++ *
+++ * The needs_resolve type implements the "resolve()" ops which
+++ * essentially does a DFS and detects backedge.
+++ *
+++ * During resolve (or DFS), different C types have different
+++ * "RESOLVED" conditions.
+++ *
+++ * When resolving a BTF_KIND_STRUCT, we need to resolve all its
+++ * members because a member is always referring to another
+++ * type.  A struct's member can be treated as "RESOLVED" if
+++ * it is referring to a BTF_KIND_PTR.  Otherwise, the
+++ * following valid C struct would be rejected:
+++ *
+++ *	struct A {
+++ *		int m;
+++ *		struct A *a;
+++ *	};
+++ *
+++ * When resolving a BTF_KIND_PTR, it needs to keep resolving if
+++ * it is referring to another BTF_KIND_PTR.  Otherwise, we cannot
+++ * detect a pointer loop, e.g.:
+++ * BTF_KIND_CONST -> BTF_KIND_PTR -> BTF_KIND_CONST -> BTF_KIND_PTR +
+++ *                        ^                                         |
+++ *                        +-----------------------------------------+
+++ *
+++ */
+++
+++#define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2)
+++#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
+++#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
+++#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
+++#define BITS_ROUNDUP_BYTES(bits) \
+++	(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
+++
+++#define BTF_INFO_MASK 0x8f00ffff
+++#define BTF_INT_MASK 0x0fffffff
+++#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
+++#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
+++
+++/* 16MB for 64k structs and each has 16 members and
+++ * a few MB spaces for the string section.
+++ * The hard limit is S32_MAX.
+++ */
+++#define BTF_MAX_SIZE (16 * 1024 * 1024)
+++
+++#define for_each_member(i, struct_type, member)			\
+++	for (i = 0, member = btf_type_member(struct_type);	\
+++	     i < btf_type_vlen(struct_type);			\
+++	     i++, member++)
+++
+++#define for_each_member_from(i, from, struct_type, member)		\
+++	for (i = from, member = btf_type_member(struct_type) + from;	\
+++	     i < btf_type_vlen(struct_type);				\
+++	     i++, member++)
+++
+++#define for_each_vsi(i, struct_type, member)			\
+++	for (i = 0, member = btf_type_var_secinfo(struct_type);	\
+++	     i < btf_type_vlen(struct_type);			\
+++	     i++, member++)
+++
+++#define for_each_vsi_from(i, from, struct_type, member)				\
+++	for (i = from, member = btf_type_var_secinfo(struct_type) + from;	\
+++	     i < btf_type_vlen(struct_type);					\
+++	     i++, member++)
+++
+++DEFINE_IDR(btf_idr);
+++DEFINE_SPINLOCK(btf_idr_lock);
+++
+++struct btf {
+++	void *data;
+++	struct btf_type **types;
+++	u32 *resolved_ids;
+++	u32 *resolved_sizes;
+++	const char *strings;
+++	void *nohdr_data;
+++	struct btf_header hdr;
+++	u32 nr_types;
+++	u32 types_size;
+++	u32 data_size;
+++	refcount_t refcnt;
+++	u32 id;
+++	struct rcu_head rcu;
+++};
+++
+++enum verifier_phase {
+++	CHECK_META,
+++	CHECK_TYPE,
+++};
+++
+++struct resolve_vertex {
+++	const struct btf_type *t;
+++	u32 type_id;
+++	u16 next_member;
+++};
+++
+++enum visit_state {
+++	NOT_VISITED,
+++	VISITED,
+++	RESOLVED,
+++};
+++
+++enum resolve_mode {
+++	RESOLVE_TBD,	/* To Be Determined */
+++	RESOLVE_PTR,	/* Resolving for Pointer */
+++	RESOLVE_STRUCT_OR_ARRAY,	/* Resolving for struct/union
+++					 * or array
+++					 */
+++};
+++
+++#define MAX_RESOLVE_DEPTH 32
+++
+++struct btf_sec_info {
+++	u32 off;
+++	u32 len;
+++};
+++
+++struct btf_verifier_env {
+++	struct btf *btf;
+++	u8 *visit_states;
+++	struct resolve_vertex stack[MAX_RESOLVE_DEPTH];
+++	struct bpf_verifier_log log;
+++	u32 log_type_id;
+++	u32 top_stack;
+++	enum verifier_phase phase;
+++	enum resolve_mode resolve_mode;
+++};
+++
+++static const char * const btf_kind_str[NR_BTF_KINDS] = {
+++	[BTF_KIND_UNKN]		= "UNKNOWN",
+++	[BTF_KIND_INT]		= "INT",
+++	[BTF_KIND_PTR]		= "PTR",
+++	[BTF_KIND_ARRAY]	= "ARRAY",
+++	[BTF_KIND_STRUCT]	= "STRUCT",
+++	[BTF_KIND_UNION]	= "UNION",
+++	[BTF_KIND_ENUM]		= "ENUM",
+++	[BTF_KIND_FWD]		= "FWD",
+++	[BTF_KIND_TYPEDEF]	= "TYPEDEF",
+++	[BTF_KIND_VOLATILE]	= "VOLATILE",
+++	[BTF_KIND_CONST]	= "CONST",
+++	[BTF_KIND_RESTRICT]	= "RESTRICT",
+++	[BTF_KIND_FUNC]		= "FUNC",
+++	[BTF_KIND_FUNC_PROTO]	= "FUNC_PROTO",
+++	[BTF_KIND_VAR]		= "VAR",
+++	[BTF_KIND_DATASEC]	= "DATASEC",
+++};
+++
+++struct btf_kind_operations {
+++	s32 (*check_meta)(struct btf_verifier_env *env,
+++			  const struct btf_type *t,
+++			  u32 meta_left);
+++	int (*resolve)(struct btf_verifier_env *env,
+++		       const struct resolve_vertex *v);
+++	int (*check_member)(struct btf_verifier_env *env,
+++			    const struct btf_type *struct_type,
+++			    const struct btf_member *member,
+++			    const struct btf_type *member_type);
+++	int (*check_kflag_member)(struct btf_verifier_env *env,
+++				  const struct btf_type *struct_type,
+++				  const struct btf_member *member,
+++				  const struct btf_type *member_type);
+++	void (*log_details)(struct btf_verifier_env *env,
+++			    const struct btf_type *t);
+++	void (*seq_show)(const struct btf *btf, const struct btf_type *t,
+++			 u32 type_id, void *data, u8 bits_offsets,
+++			 struct seq_file *m);
+++};
+++
+++static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS];
+++static struct btf_type btf_void;
+++
+++static int btf_resolve(struct btf_verifier_env *env,
+++		       const struct btf_type *t, u32 type_id);
+++
+++static bool btf_type_is_modifier(const struct btf_type *t)
+++{
+++	/* Some of them is not strictly a C modifier
+++	 * but they are grouped into the same bucket
+++	 * for BTF concern:
+++	 *   A type (t) that refers to another
+++	 *   type through t->type AND its size cannot
+++	 *   be determined without following the t->type.
+++	 *
+++	 * ptr does not fall into this bucket
+++	 * because its size is always sizeof(void *).
+++	 */
+++	switch (BTF_INFO_KIND(t->info)) {
+++	case BTF_KIND_TYPEDEF:
+++	case BTF_KIND_VOLATILE:
+++	case BTF_KIND_CONST:
+++	case BTF_KIND_RESTRICT:
+++		return true;
+++	}
+++
+++	return false;
+++}
+++
+++bool btf_type_is_void(const struct btf_type *t)
+++{
+++	return t == &btf_void;
+++}
+++
+++static bool btf_type_is_fwd(const struct btf_type *t)
+++{
+++	return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+++}
+++
+++static bool btf_type_is_func(const struct btf_type *t)
+++{
+++	return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
+++}
+++
+++static bool btf_type_is_func_proto(const struct btf_type *t)
+++{
+++	return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
+++}
+++
+++static bool btf_type_nosize(const struct btf_type *t)
+++{
+++	return btf_type_is_void(t) || btf_type_is_fwd(t) ||
+++	       btf_type_is_func(t) || btf_type_is_func_proto(t);
+++}
+++
+++static bool btf_type_nosize_or_null(const struct btf_type *t)
+++{
+++	return !t || btf_type_nosize(t);
+++}
+++
+++/* union is only a special case of struct:
+++ * all its offsetof(member) == 0
+++ */
+++static bool btf_type_is_struct(const struct btf_type *t)
+++{
+++	u8 kind = BTF_INFO_KIND(t->info);
+++
+++	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+++}
+++
+++static bool __btf_type_is_struct(const struct btf_type *t)
+++{
+++	return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
+++}
+++
+++static bool btf_type_is_array(const struct btf_type *t)
+++{
+++	return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
+++}
+++
+++static bool btf_type_is_ptr(const struct btf_type *t)
+++{
+++	return BTF_INFO_KIND(t->info) == BTF_KIND_PTR;
+++}
+++
+++static bool btf_type_is_int(const struct btf_type *t)
+++{
+++	return BTF_INFO_KIND(t->info) == BTF_KIND_INT;
+++}
+++
+++static bool btf_type_is_var(const struct btf_type *t)
+++{
+++	return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
+++}
+++
+++static bool btf_type_is_datasec(const struct btf_type *t)
+++{
+++	return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
+++}
+++
+++/* Types that act only as a source, not sink or intermediate
+++ * type when resolving.
+++ */
+++static bool btf_type_is_resolve_source_only(const struct btf_type *t)
+++{
+++	return btf_type_is_var(t) ||
+++	       btf_type_is_datasec(t);
+++}
+++
+++/* What types need to be resolved?
+++ *
+++ * btf_type_is_modifier() is an obvious one.
+++ *
+++ * btf_type_is_struct() because its member refers to
+++ * another type (through member->type).
+++ *
+++ * btf_type_is_var() because the variable refers to
+++ * another type. btf_type_is_datasec() holds multiple
+++ * btf_type_is_var() types that need resolving.
+++ *
+++ * btf_type_is_array() because its element (array->type)
+++ * refers to another type.  Array can be thought of a
+++ * special case of struct while array just has the same
+++ * member-type repeated by array->nelems of times.
+++ */
+++static bool btf_type_needs_resolve(const struct btf_type *t)
+++{
+++	return btf_type_is_modifier(t) ||
+++	       btf_type_is_ptr(t) ||
+++	       btf_type_is_struct(t) ||
+++	       btf_type_is_array(t) ||
+++	       btf_type_is_var(t) ||
+++	       btf_type_is_datasec(t);
+++}
+++
+++/* t->size can be used */
+++static bool btf_type_has_size(const struct btf_type *t)
+++{
+++	switch (BTF_INFO_KIND(t->info)) {
+++	case BTF_KIND_INT:
+++	case BTF_KIND_STRUCT:
+++	case BTF_KIND_UNION:
+++	case BTF_KIND_ENUM:
+++	case BTF_KIND_DATASEC:
+++		return true;
+++	}
+++
+++	return false;
+++}
+++
+++static const char *btf_int_encoding_str(u8 encoding)
+++{
+++	if (encoding == 0)
+++		return "(none)";
+++	else if (encoding == BTF_INT_SIGNED)
+++		return "SIGNED";
+++	else if (encoding == BTF_INT_CHAR)
+++		return "CHAR";
+++	else if (encoding == BTF_INT_BOOL)
+++		return "BOOL";
+++	else
+++		return "UNKN";
+++}
+++
+++static u16 btf_type_vlen(const struct btf_type *t)
+++{
+++	return BTF_INFO_VLEN(t->info);
+++}
+++
+++static bool btf_type_kflag(const struct btf_type *t)
+++{
+++	return BTF_INFO_KFLAG(t->info);
+++}
+++
+++static u32 btf_member_bit_offset(const struct btf_type *struct_type,
+++			     const struct btf_member *member)
+++{
+++	return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset)
+++					   : member->offset;
+++}
+++
+++static u32 btf_member_bitfield_size(const struct btf_type *struct_type,
+++				    const struct btf_member *member)
+++{
+++	return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset)
+++					   : 0;
+++}
+++
+++static u32 btf_type_int(const struct btf_type *t)
+++{
+++	return *(u32 *)(t + 1);
+++}
+++
+++static const struct btf_array *btf_type_array(const struct btf_type *t)
+++{
+++	return (const struct btf_array *)(t + 1);
+++}
+++
+++static const struct btf_member *btf_type_member(const struct btf_type *t)
+++{
+++	return (const struct btf_member *)(t + 1);
+++}
+++
+++static const struct btf_enum *btf_type_enum(const struct btf_type *t)
+++{
+++	return (const struct btf_enum *)(t + 1);
+++}
+++
+++static const struct btf_var *btf_type_var(const struct btf_type *t)
+++{
+++	return (const struct btf_var *)(t + 1);
+++}
+++
+++static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
+++{
+++	return (const struct btf_var_secinfo *)(t + 1);
+++}
+++
+++static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
+++{
+++	return kind_ops[BTF_INFO_KIND(t->info)];
+++}
+++
+++static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
+++{
+++	return BTF_STR_OFFSET_VALID(offset) &&
+++		offset < btf->hdr.str_len;
+++}
+++
+++static bool __btf_name_char_ok(char c, bool first, bool dot_ok)
+++{
+++	if ((first ? !isalpha(c) :
+++		     !isalnum(c)) &&
+++	    c != '_' &&
+++	    ((c == '.' && !dot_ok) ||
+++	      c != '.'))
+++		return false;
+++	return true;
+++}
+++
+++static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
+++{
+++	/* offset must be valid */
+++	const char *src = &btf->strings[offset];
+++	const char *src_limit;
+++
+++	if (!__btf_name_char_ok(*src, true, dot_ok))
+++		return false;
+++
+++	/* set a limit on identifier length */
+++	src_limit = src + KSYM_NAME_LEN;
+++	src++;
+++	while (*src && src < src_limit) {
+++		if (!__btf_name_char_ok(*src, false, dot_ok))
+++			return false;
+++		src++;
+++	}
+++
+++	return !*src;
+++}
+++
+++/* Only C-style identifier is permitted. This can be relaxed if
+++ * necessary.
+++ */
+++static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
+++{
+++	return __btf_name_valid(btf, offset, false);
+++}
+++
+++static bool btf_name_valid_section(const struct btf *btf, u32 offset)
+++{
+++	return __btf_name_valid(btf, offset, true);
+++}
+++
+++static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
+++{
+++	if (!offset)
+++		return "(anon)";
+++	else if (offset < btf->hdr.str_len)
+++		return &btf->strings[offset];
+++	else
+++		return "(invalid-name-offset)";
+++}
+++
+++const char *btf_name_by_offset(const struct btf *btf, u32 offset)
+++{
+++	if (offset < btf->hdr.str_len)
+++		return &btf->strings[offset];
+++
+++	return NULL;
+++}
+++
+++const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
+++{
+++	if (type_id > btf->nr_types)
+++		return NULL;
+++
+++	return btf->types[type_id];
+++}
+++
+++/*
+++ * Regular int is not a bit field and it must be either
+++ * u8/u16/u32/u64 or __int128.
+++ */
+++static bool btf_type_int_is_regular(const struct btf_type *t)
+++{
+++	u8 nr_bits, nr_bytes;
+++	u32 int_data;
+++
+++	int_data = btf_type_int(t);
+++	nr_bits = BTF_INT_BITS(int_data);
+++	nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
+++	if (BITS_PER_BYTE_MASKED(nr_bits) ||
+++	    BTF_INT_OFFSET(int_data) ||
+++	    (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
+++	     nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) &&
+++	     nr_bytes != (2 * sizeof(u64)))) {
+++		return false;
+++	}
+++
+++	return true;
+++}
+++
+++/*
+++ * Check that given struct member is a regular int with expected
+++ * offset and size.
+++ */
+++bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
+++			   const struct btf_member *m,
+++			   u32 expected_offset, u32 expected_size)
+++{
+++	const struct btf_type *t;
+++	u32 id, int_data;
+++	u8 nr_bits;
+++
+++	id = m->type;
+++	t = btf_type_id_size(btf, &id, NULL);
+++	if (!t || !btf_type_is_int(t))
+++		return false;
+++
+++	int_data = btf_type_int(t);
+++	nr_bits = BTF_INT_BITS(int_data);
+++	if (btf_type_kflag(s)) {
+++		u32 bitfield_size = BTF_MEMBER_BITFIELD_SIZE(m->offset);
+++		u32 bit_offset = BTF_MEMBER_BIT_OFFSET(m->offset);
+++
+++		/* if kflag set, int should be a regular int and
+++		 * bit offset should be at byte boundary.
+++		 */
+++		return !bitfield_size &&
+++		       BITS_ROUNDUP_BYTES(bit_offset) == expected_offset &&
+++		       BITS_ROUNDUP_BYTES(nr_bits) == expected_size;
+++	}
+++
+++	if (BTF_INT_OFFSET(int_data) ||
+++	    BITS_PER_BYTE_MASKED(m->offset) ||
+++	    BITS_ROUNDUP_BYTES(m->offset) != expected_offset ||
+++	    BITS_PER_BYTE_MASKED(nr_bits) ||
+++	    BITS_ROUNDUP_BYTES(nr_bits) != expected_size)
+++		return false;
+++
+++	return true;
+++}
+++
+++__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
+++					      const char *fmt, ...)
+++{
+++	va_list args;
+++
+++	va_start(args, fmt);
+++	bpf_verifier_vlog(log, fmt, args);
+++	va_end(args);
+++}
+++
+++__printf(2, 3) static void btf_verifier_log(struct btf_verifier_env *env,
+++					    const char *fmt, ...)
+++{
+++	struct bpf_verifier_log *log = &env->log;
+++	va_list args;
+++
+++	if (!bpf_verifier_log_needed(log))
+++		return;
+++
+++	va_start(args, fmt);
+++	bpf_verifier_vlog(log, fmt, args);
+++	va_end(args);
+++}
+++
+++__printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
+++						   const struct btf_type *t,
+++						   bool log_details,
+++						   const char *fmt, ...)
+++{
+++	struct bpf_verifier_log *log = &env->log;
+++	u8 kind = BTF_INFO_KIND(t->info);
+++	struct btf *btf = env->btf;
+++	va_list args;
+++
+++	if (!bpf_verifier_log_needed(log))
+++		return;
+++
+++	__btf_verifier_log(log, "[%u] %s %s%s",
+++			   env->log_type_id,
+++			   btf_kind_str[kind],
+++			   __btf_name_by_offset(btf, t->name_off),
+++			   log_details ? " " : "");
+++
+++	if (log_details)
+++		btf_type_ops(t)->log_details(env, t);
+++
+++	if (fmt && *fmt) {
+++		__btf_verifier_log(log, " ");
+++		va_start(args, fmt);
+++		bpf_verifier_vlog(log, fmt, args);
+++		va_end(args);
+++	}
+++
+++	__btf_verifier_log(log, "\n");
+++}
+++
+++#define btf_verifier_log_type(env, t, ...) \
+++	__btf_verifier_log_type((env), (t), true, __VA_ARGS__)
+++#define btf_verifier_log_basic(env, t, ...) \
+++	__btf_verifier_log_type((env), (t), false, __VA_ARGS__)
+++
+++__printf(4, 5)
+++static void btf_verifier_log_member(struct btf_verifier_env *env,
+++				    const struct btf_type *struct_type,
+++				    const struct btf_member *member,
+++				    const char *fmt, ...)
+++{
+++	struct bpf_verifier_log *log = &env->log;
+++	struct btf *btf = env->btf;
+++	va_list args;
+++
+++	if (!bpf_verifier_log_needed(log))
+++		return;
+++
+++	/* The CHECK_META phase already did a btf dump.
+++	 *
+++	 * If member is logged again, it must hit an error in
+++	 * parsing this member.  It is useful to print out which
+++	 * struct this member belongs to.
+++	 */
+++	if (env->phase != CHECK_META)
+++		btf_verifier_log_type(env, struct_type, NULL);
+++
+++	if (btf_type_kflag(struct_type))
+++		__btf_verifier_log(log,
+++				   "\t%s type_id=%u bitfield_size=%u bits_offset=%u",
+++				   __btf_name_by_offset(btf, member->name_off),
+++				   member->type,
+++				   BTF_MEMBER_BITFIELD_SIZE(member->offset),
+++				   BTF_MEMBER_BIT_OFFSET(member->offset));
+++	else
+++		__btf_verifier_log(log, "\t%s type_id=%u bits_offset=%u",
+++				   __btf_name_by_offset(btf, member->name_off),
+++				   member->type, member->offset);
+++
+++	if (fmt && *fmt) {
+++		__btf_verifier_log(log, " ");
+++		va_start(args, fmt);
+++		bpf_verifier_vlog(log, fmt, args);
+++		va_end(args);
+++	}
+++
+++	__btf_verifier_log(log, "\n");
+++}
+++
+++__printf(4, 5)
+++static void btf_verifier_log_vsi(struct btf_verifier_env *env,
+++				 const struct btf_type *datasec_type,
+++				 const struct btf_var_secinfo *vsi,
+++				 const char *fmt, ...)
+++{
+++	struct bpf_verifier_log *log = &env->log;
+++	va_list args;
+++
+++	if (!bpf_verifier_log_needed(log))
+++		return;
+++	if (env->phase != CHECK_META)
+++		btf_verifier_log_type(env, datasec_type, NULL);
+++
+++	__btf_verifier_log(log, "\t type_id=%u offset=%u size=%u",
+++			   vsi->type, vsi->offset, vsi->size);
+++	if (fmt && *fmt) {
+++		__btf_verifier_log(log, " ");
+++		va_start(args, fmt);
+++		bpf_verifier_vlog(log, fmt, args);
+++		va_end(args);
+++	}
+++
+++	__btf_verifier_log(log, "\n");
+++}
+++
+++static void btf_verifier_log_hdr(struct btf_verifier_env *env,
+++				 u32 btf_data_size)
+++{
+++	struct bpf_verifier_log *log = &env->log;
+++	const struct btf *btf = env->btf;
+++	const struct btf_header *hdr;
+++
+++	if (!bpf_verifier_log_needed(log))
+++		return;
+++
+++	hdr = &btf->hdr;
+++	__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
+++	__btf_verifier_log(log, "version: %u\n", hdr->version);
+++	__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
+++	__btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
+++	__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
+++	__btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
+++	__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
+++	__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
+++	__btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
+++}
+++
+++static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
+++{
+++	struct btf *btf = env->btf;
+++
+++	/* < 2 because +1 for btf_void which is always in btf->types[0].
+++	 * btf_void is not accounted in btf->nr_types because btf_void
+++	 * does not come from the BTF file.
+++	 */
+++	if (btf->types_size - btf->nr_types < 2) {
+++		/* Expand 'types' array */
+++
+++		struct btf_type **new_types;
+++		u32 expand_by, new_size;
+++
+++		if (btf->types_size == BTF_MAX_TYPE) {
+++			btf_verifier_log(env, "Exceeded max num of types");
+++			return -E2BIG;
+++		}
+++
+++		expand_by = max_t(u32, btf->types_size >> 2, 16);
+++		new_size = min_t(u32, BTF_MAX_TYPE,
+++				 btf->types_size + expand_by);
+++
+++		new_types = kcalloc(new_size, sizeof(*new_types),
+++				     GFP_KERNEL | __GFP_NOWARN);
+++		if (!new_types)
+++			return -ENOMEM;
+++
+++		if (btf->nr_types == 0)
+++			new_types[0] = &btf_void;
+++		else
+++			memcpy(new_types, btf->types,
+++			       sizeof(*btf->types) * (btf->nr_types + 1));
+++
+++		kvfree(btf->types);
+++		btf->types = new_types;
+++		btf->types_size = new_size;
+++	}
+++
+++	btf->types[++(btf->nr_types)] = t;
+++
+++	return 0;
+++}
+++
+++static int btf_alloc_id(struct btf *btf)
+++{
+++	int id;
+++
+++	idr_preload(GFP_KERNEL);
+++	spin_lock_bh(&btf_idr_lock);
+++	id = idr_alloc_cyclic(&btf_idr, btf, 1, INT_MAX, GFP_ATOMIC);
+++	if (id > 0)
+++		btf->id = id;
+++	spin_unlock_bh(&btf_idr_lock);
+++	idr_preload_end();
+++
+++	if (WARN_ON_ONCE(!id))
+++		return -ENOSPC;
+++
+++	return id > 0 ? 0 : id;
+++}
+++
+++static void btf_free_id(struct btf *btf)
+++{
+++	unsigned long flags;
+++
+++	/*
+++	 * In map-in-map, calling map_delete_elem() on outer
+++	 * map will call bpf_map_put on the inner map.
+++	 * It will then eventually call btf_free_id()
+++	 * on the inner map.  Some of the map_delete_elem()
+++	 * implementation may have irq disabled, so
+++	 * we need to use the _irqsave() version instead
+++	 * of the _bh() version.
+++	 */
+++	spin_lock_irqsave(&btf_idr_lock, flags);
+++	idr_remove(&btf_idr, btf->id);
+++	spin_unlock_irqrestore(&btf_idr_lock, flags);
+++}
+++
+++static void btf_free(struct btf *btf)
+++{
+++	kvfree(btf->types);
+++	kvfree(btf->resolved_sizes);
+++	kvfree(btf->resolved_ids);
+++	kvfree(btf->data);
+++	kfree(btf);
+++}
+++
+++static void btf_free_rcu(struct rcu_head *rcu)
+++{
+++	struct btf *btf = container_of(rcu, struct btf, rcu);
+++
+++	btf_free(btf);
+++}
+++
+++void btf_put(struct btf *btf)
+++{
+++	if (btf && refcount_dec_and_test(&btf->refcnt)) {
+++		btf_free_id(btf);
+++		call_rcu(&btf->rcu, btf_free_rcu);
+++	}
+++}
+++
+++static int env_resolve_init(struct btf_verifier_env *env)
+++{
+++	struct btf *btf = env->btf;
+++	u32 nr_types = btf->nr_types;
+++	u32 *resolved_sizes = NULL;
+++	u32 *resolved_ids = NULL;
+++	u8 *visit_states = NULL;
+++
+++	/* +1 for btf_void */
+++	resolved_sizes = kcalloc(nr_types + 1, sizeof(*resolved_sizes),
+++				  GFP_KERNEL | __GFP_NOWARN);
+++	if (!resolved_sizes)
+++		goto nomem;
+++
+++	resolved_ids = kcalloc(nr_types + 1, sizeof(*resolved_ids),
+++				GFP_KERNEL | __GFP_NOWARN);
+++	if (!resolved_ids)
+++		goto nomem;
+++
+++	visit_states = kcalloc(nr_types + 1, sizeof(*visit_states),
+++				GFP_KERNEL | __GFP_NOWARN);
+++	if (!visit_states)
+++		goto nomem;
+++
+++	btf->resolved_sizes = resolved_sizes;
+++	btf->resolved_ids = resolved_ids;
+++	env->visit_states = visit_states;
+++
+++	return 0;
+++
+++nomem:
+++	kvfree(resolved_sizes);
+++	kvfree(resolved_ids);
+++	kvfree(visit_states);
+++	return -ENOMEM;
+++}
+++
+++static void btf_verifier_env_free(struct btf_verifier_env *env)
+++{
+++	kvfree(env->visit_states);
+++	kfree(env);
+++}
+++
+++static bool env_type_is_resolve_sink(const struct btf_verifier_env *env,
+++				     const struct btf_type *next_type)
+++{
+++	switch (env->resolve_mode) {
+++	case RESOLVE_TBD:
+++		/* int, enum or void is a sink */
+++		return !btf_type_needs_resolve(next_type);
+++	case RESOLVE_PTR:
+++		/* int, enum, void, struct, array, func or func_proto is a sink
+++		 * for ptr
+++		 */
+++		return !btf_type_is_modifier(next_type) &&
+++			!btf_type_is_ptr(next_type);
+++	case RESOLVE_STRUCT_OR_ARRAY:
+++		/* int, enum, void, ptr, func or func_proto is a sink
+++		 * for struct and array
+++		 */
+++		return !btf_type_is_modifier(next_type) &&
+++			!btf_type_is_array(next_type) &&
+++			!btf_type_is_struct(next_type);
+++	default:
+++		BUG();
+++	}
+++}
+++
+++static bool env_type_is_resolved(const struct btf_verifier_env *env,
+++				 u32 type_id)
+++{
+++	return env->visit_states[type_id] == RESOLVED;
+++}
+++
+++static int env_stack_push(struct btf_verifier_env *env,
+++			  const struct btf_type *t, u32 type_id)
+++{
+++	struct resolve_vertex *v;
+++
+++	if (env->top_stack == MAX_RESOLVE_DEPTH)
+++		return -E2BIG;
+++
+++	if (env->visit_states[type_id] != NOT_VISITED)
+++		return -EEXIST;
+++
+++	env->visit_states[type_id] = VISITED;
+++
+++	v = &env->stack[env->top_stack++];
+++	v->t = t;
+++	v->type_id = type_id;
+++	v->next_member = 0;
+++
+++	if (env->resolve_mode == RESOLVE_TBD) {
+++		if (btf_type_is_ptr(t))
+++			env->resolve_mode = RESOLVE_PTR;
+++		else if (btf_type_is_struct(t) || btf_type_is_array(t))
+++			env->resolve_mode = RESOLVE_STRUCT_OR_ARRAY;
+++	}
+++
+++	return 0;
+++}
+++
+++static void env_stack_set_next_member(struct btf_verifier_env *env,
+++				      u16 next_member)
+++{
+++	env->stack[env->top_stack - 1].next_member = next_member;
+++}
+++
+++static void env_stack_pop_resolved(struct btf_verifier_env *env,
+++				   u32 resolved_type_id,
+++				   u32 resolved_size)
+++{
+++	u32 type_id = env->stack[--(env->top_stack)].type_id;
+++	struct btf *btf = env->btf;
+++
+++	btf->resolved_sizes[type_id] = resolved_size;
+++	btf->resolved_ids[type_id] = resolved_type_id;
+++	env->visit_states[type_id] = RESOLVED;
+++}
+++
+++static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env)
+++{
+++	return env->top_stack ? &env->stack[env->top_stack - 1] : NULL;
+++}
+++
+++/* The input param "type_id" must point to a needs_resolve type */
+++static const struct btf_type *btf_type_id_resolve(const struct btf *btf,
+++						  u32 *type_id)
+++{
+++	*type_id = btf->resolved_ids[*type_id];
+++	return btf_type_by_id(btf, *type_id);
+++}
+++
+++const struct btf_type *btf_type_id_size(const struct btf *btf,
+++					u32 *type_id, u32 *ret_size)
+++{
+++	const struct btf_type *size_type;
+++	u32 size_type_id = *type_id;
+++	u32 size = 0;
+++
+++	size_type = btf_type_by_id(btf, size_type_id);
+++	if (btf_type_nosize_or_null(size_type))
+++		return NULL;
+++
+++	if (btf_type_has_size(size_type)) {
+++		size = size_type->size;
+++	} else if (btf_type_is_array(size_type)) {
+++		size = btf->resolved_sizes[size_type_id];
+++	} else if (btf_type_is_ptr(size_type)) {
+++		size = sizeof(void *);
+++	} else {
+++		if (WARN_ON_ONCE(!btf_type_is_modifier(size_type) &&
+++				 !btf_type_is_var(size_type)))
+++			return NULL;
+++
+++		size_type_id = btf->resolved_ids[size_type_id];
+++		size_type = btf_type_by_id(btf, size_type_id);
+++		if (btf_type_nosize_or_null(size_type))
+++			return NULL;
+++		else if (btf_type_has_size(size_type))
+++			size = size_type->size;
+++		else if (btf_type_is_array(size_type))
+++			size = btf->resolved_sizes[size_type_id];
+++		else if (btf_type_is_ptr(size_type))
+++			size = sizeof(void *);
+++		else
+++			return NULL;
+++	}
+++
+++	*type_id = size_type_id;
+++	if (ret_size)
+++		*ret_size = size;
+++
+++	return size_type;
+++}
+++
+++static int btf_df_check_member(struct btf_verifier_env *env,
+++			       const struct btf_type *struct_type,
+++			       const struct btf_member *member,
+++			       const struct btf_type *member_type)
+++{
+++	btf_verifier_log_basic(env, struct_type,
+++			       "Unsupported check_member");
+++	return -EINVAL;
+++}
+++
+++static int btf_df_check_kflag_member(struct btf_verifier_env *env,
+++				     const struct btf_type *struct_type,
+++				     const struct btf_member *member,
+++				     const struct btf_type *member_type)
+++{
+++	btf_verifier_log_basic(env, struct_type,
+++			       "Unsupported check_kflag_member");
+++	return -EINVAL;
+++}
+++
+++/* Used for ptr, array and struct/union type members.
+++ * int, enum and modifier types have their specific callback functions.
+++ */
+++static int btf_generic_check_kflag_member(struct btf_verifier_env *env,
+++					  const struct btf_type *struct_type,
+++					  const struct btf_member *member,
+++					  const struct btf_type *member_type)
+++{
+++	if (BTF_MEMBER_BITFIELD_SIZE(member->offset)) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Invalid member bitfield_size");
+++		return -EINVAL;
+++	}
+++
+++	/* bitfield size is 0, so member->offset represents bit offset only.
+++	 * It is safe to call non kflag check_member variants.
+++	 */
+++	return btf_type_ops(member_type)->check_member(env, struct_type,
+++						       member,
+++						       member_type);
+++}
+++
+++static int btf_df_resolve(struct btf_verifier_env *env,
+++			  const struct resolve_vertex *v)
+++{
+++	btf_verifier_log_basic(env, v->t, "Unsupported resolve");
+++	return -EINVAL;
+++}
+++
+++static void btf_df_seq_show(const struct btf *btf, const struct btf_type *t,
+++			    u32 type_id, void *data, u8 bits_offsets,
+++			    struct seq_file *m)
+++{
+++	seq_printf(m, "<unsupported kind:%u>", BTF_INFO_KIND(t->info));
+++}
+++
+++static int btf_int_check_member(struct btf_verifier_env *env,
+++				const struct btf_type *struct_type,
+++				const struct btf_member *member,
+++				const struct btf_type *member_type)
+++{
+++	u32 int_data = btf_type_int(member_type);
+++	u32 struct_bits_off = member->offset;
+++	u32 struct_size = struct_type->size;
+++	u32 nr_copy_bits;
+++	u32 bytes_offset;
+++
+++	if (U32_MAX - struct_bits_off < BTF_INT_OFFSET(int_data)) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"bits_offset exceeds U32_MAX");
+++		return -EINVAL;
+++	}
+++
+++	struct_bits_off += BTF_INT_OFFSET(int_data);
+++	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+++	nr_copy_bits = BTF_INT_BITS(int_data) +
+++		BITS_PER_BYTE_MASKED(struct_bits_off);
+++
+++	if (nr_copy_bits > BITS_PER_U128) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"nr_copy_bits exceeds 128");
+++		return -EINVAL;
+++	}
+++
+++	if (struct_size < bytes_offset ||
+++	    struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Member exceeds struct_size");
+++		return -EINVAL;
+++	}
+++
+++	return 0;
+++}
+++
+++static int btf_int_check_kflag_member(struct btf_verifier_env *env,
+++				      const struct btf_type *struct_type,
+++				      const struct btf_member *member,
+++				      const struct btf_type *member_type)
+++{
+++	u32 struct_bits_off, nr_bits, nr_int_data_bits, bytes_offset;
+++	u32 int_data = btf_type_int(member_type);
+++	u32 struct_size = struct_type->size;
+++	u32 nr_copy_bits;
+++
+++	/* a regular int type is required for the kflag int member */
+++	if (!btf_type_int_is_regular(member_type)) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Invalid member base type");
+++		return -EINVAL;
+++	}
+++
+++	/* check sanity of bitfield size */
+++	nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset);
+++	struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset);
+++	nr_int_data_bits = BTF_INT_BITS(int_data);
+++	if (!nr_bits) {
+++		/* Not a bitfield member, member offset must be at byte
+++		 * boundary.
+++		 */
+++		if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+++			btf_verifier_log_member(env, struct_type, member,
+++						"Invalid member offset");
+++			return -EINVAL;
+++		}
+++
+++		nr_bits = nr_int_data_bits;
+++	} else if (nr_bits > nr_int_data_bits) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Invalid member bitfield_size");
+++		return -EINVAL;
+++	}
+++
+++	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+++	nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off);
+++	if (nr_copy_bits > BITS_PER_U128) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"nr_copy_bits exceeds 128");
+++		return -EINVAL;
+++	}
+++
+++	if (struct_size < bytes_offset ||
+++	    struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Member exceeds struct_size");
+++		return -EINVAL;
+++	}
+++
+++	return 0;
+++}
+++
+++static s32 btf_int_check_meta(struct btf_verifier_env *env,
+++			      const struct btf_type *t,
+++			      u32 meta_left)
+++{
+++	u32 int_data, nr_bits, meta_needed = sizeof(int_data);
+++	u16 encoding;
+++
+++	if (meta_left < meta_needed) {
+++		btf_verifier_log_basic(env, t,
+++				       "meta_left:%u meta_needed:%u",
+++				       meta_left, meta_needed);
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_vlen(t)) {
+++		btf_verifier_log_type(env, t, "vlen != 0");
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_kflag(t)) {
+++		btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+++		return -EINVAL;
+++	}
+++
+++	int_data = btf_type_int(t);
+++	if (int_data & ~BTF_INT_MASK) {
+++		btf_verifier_log_basic(env, t, "Invalid int_data:%x",
+++				       int_data);
+++		return -EINVAL;
+++	}
+++
+++	nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
+++
+++	if (nr_bits > BITS_PER_U128) {
+++		btf_verifier_log_type(env, t, "nr_bits exceeds %zu",
+++				      BITS_PER_U128);
+++		return -EINVAL;
+++	}
+++
+++	if (BITS_ROUNDUP_BYTES(nr_bits) > t->size) {
+++		btf_verifier_log_type(env, t, "nr_bits exceeds type_size");
+++		return -EINVAL;
+++	}
+++
+++	/*
+++	 * Only one of the encoding bits is allowed and it
+++	 * should be sufficient for the pretty print purpose (i.e. decoding).
+++	 * Multiple bits can be allowed later if it is found
+++	 * to be insufficient.
+++	 */
+++	encoding = BTF_INT_ENCODING(int_data);
+++	if (encoding &&
+++	    encoding != BTF_INT_SIGNED &&
+++	    encoding != BTF_INT_CHAR &&
+++	    encoding != BTF_INT_BOOL) {
+++		btf_verifier_log_type(env, t, "Unsupported encoding");
+++		return -ENOTSUPP;
+++	}
+++
+++	btf_verifier_log_type(env, t, NULL);
+++
+++	return meta_needed;
+++}
+++
+++static void btf_int_log(struct btf_verifier_env *env,
+++			const struct btf_type *t)
+++{
+++	int int_data = btf_type_int(t);
+++
+++	btf_verifier_log(env,
+++			 "size=%u bits_offset=%u nr_bits=%u encoding=%s",
+++			 t->size, BTF_INT_OFFSET(int_data),
+++			 BTF_INT_BITS(int_data),
+++			 btf_int_encoding_str(BTF_INT_ENCODING(int_data)));
+++}
+++
+++static void btf_int128_print(struct seq_file *m, void *data)
+++{
+++	/* data points to a __int128 number.
+++	 * Suppose
+++	 *     int128_num = *(__int128 *)data;
+++	 * The below formulas shows what upper_num and lower_num represents:
+++	 *     upper_num = int128_num >> 64;
+++	 *     lower_num = int128_num & 0xffffffffFFFFFFFFULL;
+++	 */
+++	u64 upper_num, lower_num;
+++
+++#ifdef __BIG_ENDIAN_BITFIELD
+++	upper_num = *(u64 *)data;
+++	lower_num = *(u64 *)(data + 8);
+++#else
+++	upper_num = *(u64 *)(data + 8);
+++	lower_num = *(u64 *)data;
+++#endif
+++	if (upper_num == 0)
+++		seq_printf(m, "0x%llx", lower_num);
+++	else
+++		seq_printf(m, "0x%llx%016llx", upper_num, lower_num);
+++}
+++
+++static void btf_int128_shift(u64 *print_num, u16 left_shift_bits,
+++			     u16 right_shift_bits)
+++{
+++	u64 upper_num, lower_num;
+++
+++#ifdef __BIG_ENDIAN_BITFIELD
+++	upper_num = print_num[0];
+++	lower_num = print_num[1];
+++#else
+++	upper_num = print_num[1];
+++	lower_num = print_num[0];
+++#endif
+++
+++	/* shake out un-needed bits by shift/or operations */
+++	if (left_shift_bits >= 64) {
+++		upper_num = lower_num << (left_shift_bits - 64);
+++		lower_num = 0;
+++	} else {
+++		upper_num = (upper_num << left_shift_bits) |
+++			    (lower_num >> (64 - left_shift_bits));
+++		lower_num = lower_num << left_shift_bits;
+++	}
+++
+++	if (right_shift_bits >= 64) {
+++		lower_num = upper_num >> (right_shift_bits - 64);
+++		upper_num = 0;
+++	} else {
+++		lower_num = (lower_num >> right_shift_bits) |
+++			    (upper_num << (64 - right_shift_bits));
+++		upper_num = upper_num >> right_shift_bits;
+++	}
+++
+++#ifdef __BIG_ENDIAN_BITFIELD
+++	print_num[0] = upper_num;
+++	print_num[1] = lower_num;
+++#else
+++	print_num[0] = lower_num;
+++	print_num[1] = upper_num;
+++#endif
+++}
+++
+++static void btf_bitfield_seq_show(void *data, u8 bits_offset,
+++				  u8 nr_bits, struct seq_file *m)
+++{
+++	u16 left_shift_bits, right_shift_bits;
+++	u8 nr_copy_bytes;
+++	u8 nr_copy_bits;
+++	u64 print_num[2] = {};
+++
+++	nr_copy_bits = nr_bits + bits_offset;
+++	nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
+++
+++	memcpy(print_num, data, nr_copy_bytes);
+++
+++#ifdef __BIG_ENDIAN_BITFIELD
+++	left_shift_bits = bits_offset;
+++#else
+++	left_shift_bits = BITS_PER_U128 - nr_copy_bits;
+++#endif
+++	right_shift_bits = BITS_PER_U128 - nr_bits;
+++
+++	btf_int128_shift(print_num, left_shift_bits, right_shift_bits);
+++	btf_int128_print(m, print_num);
+++}
+++
+++
+++static void btf_int_bits_seq_show(const struct btf *btf,
+++				  const struct btf_type *t,
+++				  void *data, u8 bits_offset,
+++				  struct seq_file *m)
+++{
+++	u32 int_data = btf_type_int(t);
+++	u8 nr_bits = BTF_INT_BITS(int_data);
+++	u8 total_bits_offset;
+++
+++	/*
+++	 * bits_offset is at most 7.
+++	 * BTF_INT_OFFSET() cannot exceed 128 bits.
+++	 */
+++	total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
+++	data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
+++	bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
+++	btf_bitfield_seq_show(data, bits_offset, nr_bits, m);
+++}
+++
+++static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,
+++			     u32 type_id, void *data, u8 bits_offset,
+++			     struct seq_file *m)
+++{
+++	u32 int_data = btf_type_int(t);
+++	u8 encoding = BTF_INT_ENCODING(int_data);
+++	bool sign = encoding & BTF_INT_SIGNED;
+++	u8 nr_bits = BTF_INT_BITS(int_data);
+++
+++	if (bits_offset || BTF_INT_OFFSET(int_data) ||
+++	    BITS_PER_BYTE_MASKED(nr_bits)) {
+++		btf_int_bits_seq_show(btf, t, data, bits_offset, m);
+++		return;
+++	}
+++
+++	switch (nr_bits) {
+++	case 128:
+++		btf_int128_print(m, data);
+++		break;
+++	case 64:
+++		if (sign)
+++			seq_printf(m, "%lld", *(s64 *)data);
+++		else
+++			seq_printf(m, "%llu", *(u64 *)data);
+++		break;
+++	case 32:
+++		if (sign)
+++			seq_printf(m, "%d", *(s32 *)data);
+++		else
+++			seq_printf(m, "%u", *(u32 *)data);
+++		break;
+++	case 16:
+++		if (sign)
+++			seq_printf(m, "%d", *(s16 *)data);
+++		else
+++			seq_printf(m, "%u", *(u16 *)data);
+++		break;
+++	case 8:
+++		if (sign)
+++			seq_printf(m, "%d", *(s8 *)data);
+++		else
+++			seq_printf(m, "%u", *(u8 *)data);
+++		break;
+++	default:
+++		btf_int_bits_seq_show(btf, t, data, bits_offset, m);
+++	}
+++}
+++
+++static const struct btf_kind_operations int_ops = {
+++	.check_meta = btf_int_check_meta,
+++	.resolve = btf_df_resolve,
+++	.check_member = btf_int_check_member,
+++	.check_kflag_member = btf_int_check_kflag_member,
+++	.log_details = btf_int_log,
+++	.seq_show = btf_int_seq_show,
+++};
+++
+++static int btf_modifier_check_member(struct btf_verifier_env *env,
+++				     const struct btf_type *struct_type,
+++				     const struct btf_member *member,
+++				     const struct btf_type *member_type)
+++{
+++	const struct btf_type *resolved_type;
+++	u32 resolved_type_id = member->type;
+++	struct btf_member resolved_member;
+++	struct btf *btf = env->btf;
+++
+++	resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL);
+++	if (!resolved_type) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Invalid member");
+++		return -EINVAL;
+++	}
+++
+++	resolved_member = *member;
+++	resolved_member.type = resolved_type_id;
+++
+++	return btf_type_ops(resolved_type)->check_member(env, struct_type,
+++							 &resolved_member,
+++							 resolved_type);
+++}
+++
+++static int btf_modifier_check_kflag_member(struct btf_verifier_env *env,
+++					   const struct btf_type *struct_type,
+++					   const struct btf_member *member,
+++					   const struct btf_type *member_type)
+++{
+++	const struct btf_type *resolved_type;
+++	u32 resolved_type_id = member->type;
+++	struct btf_member resolved_member;
+++	struct btf *btf = env->btf;
+++
+++	resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL);
+++	if (!resolved_type) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Invalid member");
+++		return -EINVAL;
+++	}
+++
+++	resolved_member = *member;
+++	resolved_member.type = resolved_type_id;
+++
+++	return btf_type_ops(resolved_type)->check_kflag_member(env, struct_type,
+++							       &resolved_member,
+++							       resolved_type);
+++}
+++
+++static int btf_ptr_check_member(struct btf_verifier_env *env,
+++				const struct btf_type *struct_type,
+++				const struct btf_member *member,
+++				const struct btf_type *member_type)
+++{
+++	u32 struct_size, struct_bits_off, bytes_offset;
+++
+++	struct_size = struct_type->size;
+++	struct_bits_off = member->offset;
+++	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+++
+++	if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Member is not byte aligned");
+++		return -EINVAL;
+++	}
+++
+++	if (struct_size - bytes_offset < sizeof(void *)) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Member exceeds struct_size");
+++		return -EINVAL;
+++	}
+++
+++	return 0;
+++}
+++
+++static int btf_ref_type_check_meta(struct btf_verifier_env *env,
+++				   const struct btf_type *t,
+++				   u32 meta_left)
+++{
+++	if (btf_type_vlen(t)) {
+++		btf_verifier_log_type(env, t, "vlen != 0");
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_kflag(t)) {
+++		btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+++		return -EINVAL;
+++	}
+++
+++	if (!BTF_TYPE_ID_VALID(t->type)) {
+++		btf_verifier_log_type(env, t, "Invalid type_id");
+++		return -EINVAL;
+++	}
+++
+++	/* typedef type must have a valid name, and other ref types,
+++	 * volatile, const, restrict, should have a null name.
+++	 */
+++	if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF) {
+++		if (!t->name_off ||
+++		    !btf_name_valid_identifier(env->btf, t->name_off)) {
+++			btf_verifier_log_type(env, t, "Invalid name");
+++			return -EINVAL;
+++		}
+++	} else {
+++		if (t->name_off) {
+++			btf_verifier_log_type(env, t, "Invalid name");
+++			return -EINVAL;
+++		}
+++	}
+++
+++	btf_verifier_log_type(env, t, NULL);
+++
+++	return 0;
+++}
+++
+++static int btf_modifier_resolve(struct btf_verifier_env *env,
+++				const struct resolve_vertex *v)
+++{
+++	const struct btf_type *t = v->t;
+++	const struct btf_type *next_type;
+++	u32 next_type_id = t->type;
+++	struct btf *btf = env->btf;
+++
+++	next_type = btf_type_by_id(btf, next_type_id);
+++	if (!next_type || btf_type_is_resolve_source_only(next_type)) {
+++		btf_verifier_log_type(env, v->t, "Invalid type_id");
+++		return -EINVAL;
+++	}
+++
+++	if (!env_type_is_resolve_sink(env, next_type) &&
+++	    !env_type_is_resolved(env, next_type_id))
+++		return env_stack_push(env, next_type, next_type_id);
+++
+++	/* Figure out the resolved next_type_id with size.
+++	 * They will be stored in the current modifier's
+++	 * resolved_ids and resolved_sizes such that it can
+++	 * save us a few type-following when we use it later (e.g. in
+++	 * pretty print).
+++	 */
+++	if (!btf_type_id_size(btf, &next_type_id, NULL)) {
+++		if (env_type_is_resolved(env, next_type_id))
+++			next_type = btf_type_id_resolve(btf, &next_type_id);
+++
+++		/* "typedef void new_void", "const void"...etc */
+++		if (!btf_type_is_void(next_type) &&
+++		    !btf_type_is_fwd(next_type) &&
+++		    !btf_type_is_func_proto(next_type)) {
+++			btf_verifier_log_type(env, v->t, "Invalid type_id");
+++			return -EINVAL;
+++		}
+++	}
+++
+++	env_stack_pop_resolved(env, next_type_id, 0);
+++
+++	return 0;
+++}
+++
+++static int btf_var_resolve(struct btf_verifier_env *env,
+++			   const struct resolve_vertex *v)
+++{
+++	const struct btf_type *next_type;
+++	const struct btf_type *t = v->t;
+++	u32 next_type_id = t->type;
+++	struct btf *btf = env->btf;
+++
+++	next_type = btf_type_by_id(btf, next_type_id);
+++	if (!next_type || btf_type_is_resolve_source_only(next_type)) {
+++		btf_verifier_log_type(env, v->t, "Invalid type_id");
+++		return -EINVAL;
+++	}
+++
+++	if (!env_type_is_resolve_sink(env, next_type) &&
+++	    !env_type_is_resolved(env, next_type_id))
+++		return env_stack_push(env, next_type, next_type_id);
+++
+++	if (btf_type_is_modifier(next_type)) {
+++		const struct btf_type *resolved_type;
+++		u32 resolved_type_id;
+++
+++		resolved_type_id = next_type_id;
+++		resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
+++
+++		if (btf_type_is_ptr(resolved_type) &&
+++		    !env_type_is_resolve_sink(env, resolved_type) &&
+++		    !env_type_is_resolved(env, resolved_type_id))
+++			return env_stack_push(env, resolved_type,
+++					      resolved_type_id);
+++	}
+++
+++	/* We must resolve to something concrete at this point, no
+++	 * forward types or similar that would resolve to size of
+++	 * zero is allowed.
+++	 */
+++	if (!btf_type_id_size(btf, &next_type_id, NULL)) {
+++		btf_verifier_log_type(env, v->t, "Invalid type_id");
+++		return -EINVAL;
+++	}
+++
+++	env_stack_pop_resolved(env, next_type_id, 0);
+++
+++	return 0;
+++}
+++
+++static int btf_ptr_resolve(struct btf_verifier_env *env,
+++			   const struct resolve_vertex *v)
+++{
+++	const struct btf_type *next_type;
+++	const struct btf_type *t = v->t;
+++	u32 next_type_id = t->type;
+++	struct btf *btf = env->btf;
+++
+++	next_type = btf_type_by_id(btf, next_type_id);
+++	if (!next_type || btf_type_is_resolve_source_only(next_type)) {
+++		btf_verifier_log_type(env, v->t, "Invalid type_id");
+++		return -EINVAL;
+++	}
+++
+++	if (!env_type_is_resolve_sink(env, next_type) &&
+++	    !env_type_is_resolved(env, next_type_id))
+++		return env_stack_push(env, next_type, next_type_id);
+++
+++	/* If the modifier was RESOLVED during RESOLVE_STRUCT_OR_ARRAY,
+++	 * the modifier may have stopped resolving when it was resolved
+++	 * to a ptr (last-resolved-ptr).
+++	 *
+++	 * We now need to continue from the last-resolved-ptr to
+++	 * ensure the last-resolved-ptr will not referring back to
+++	 * the currenct ptr (t).
+++	 */
+++	if (btf_type_is_modifier(next_type)) {
+++		const struct btf_type *resolved_type;
+++		u32 resolved_type_id;
+++
+++		resolved_type_id = next_type_id;
+++		resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
+++
+++		if (btf_type_is_ptr(resolved_type) &&
+++		    !env_type_is_resolve_sink(env, resolved_type) &&
+++		    !env_type_is_resolved(env, resolved_type_id))
+++			return env_stack_push(env, resolved_type,
+++					      resolved_type_id);
+++	}
+++
+++	if (!btf_type_id_size(btf, &next_type_id, NULL)) {
+++		if (env_type_is_resolved(env, next_type_id))
+++			next_type = btf_type_id_resolve(btf, &next_type_id);
+++
+++		if (!btf_type_is_void(next_type) &&
+++		    !btf_type_is_fwd(next_type) &&
+++		    !btf_type_is_func_proto(next_type)) {
+++			btf_verifier_log_type(env, v->t, "Invalid type_id");
+++			return -EINVAL;
+++		}
+++	}
+++
+++	env_stack_pop_resolved(env, next_type_id, 0);
+++
+++	return 0;
+++}
+++
+++static void btf_modifier_seq_show(const struct btf *btf,
+++				  const struct btf_type *t,
+++				  u32 type_id, void *data,
+++				  u8 bits_offset, struct seq_file *m)
+++{
+++	t = btf_type_id_resolve(btf, &type_id);
+++
+++	btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
+++}
+++
+++static void btf_var_seq_show(const struct btf *btf, const struct btf_type *t,
+++			     u32 type_id, void *data, u8 bits_offset,
+++			     struct seq_file *m)
+++{
+++	t = btf_type_id_resolve(btf, &type_id);
+++
+++	btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
+++}
+++
+++static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t,
+++			     u32 type_id, void *data, u8 bits_offset,
+++			     struct seq_file *m)
+++{
+++	/* It is a hashed value */
+++	seq_printf(m, "%p", *(void **)data);
+++}
+++
+++static void btf_ref_type_log(struct btf_verifier_env *env,
+++			     const struct btf_type *t)
+++{
+++	btf_verifier_log(env, "type_id=%u", t->type);
+++}
+++
+++static struct btf_kind_operations modifier_ops = {
+++	.check_meta = btf_ref_type_check_meta,
+++	.resolve = btf_modifier_resolve,
+++	.check_member = btf_modifier_check_member,
+++	.check_kflag_member = btf_modifier_check_kflag_member,
+++	.log_details = btf_ref_type_log,
+++	.seq_show = btf_modifier_seq_show,
+++};
+++
+++static struct btf_kind_operations ptr_ops = {
+++	.check_meta = btf_ref_type_check_meta,
+++	.resolve = btf_ptr_resolve,
+++	.check_member = btf_ptr_check_member,
+++	.check_kflag_member = btf_generic_check_kflag_member,
+++	.log_details = btf_ref_type_log,
+++	.seq_show = btf_ptr_seq_show,
+++};
+++
+++static s32 btf_fwd_check_meta(struct btf_verifier_env *env,
+++			      const struct btf_type *t,
+++			      u32 meta_left)
+++{
+++	if (btf_type_vlen(t)) {
+++		btf_verifier_log_type(env, t, "vlen != 0");
+++		return -EINVAL;
+++	}
+++
+++	if (t->type) {
+++		btf_verifier_log_type(env, t, "type != 0");
+++		return -EINVAL;
+++	}
+++
+++	/* fwd type must have a valid name */
+++	if (!t->name_off ||
+++	    !btf_name_valid_identifier(env->btf, t->name_off)) {
+++		btf_verifier_log_type(env, t, "Invalid name");
+++		return -EINVAL;
+++	}
+++
+++	btf_verifier_log_type(env, t, NULL);
+++
+++	return 0;
+++}
+++
+++static void btf_fwd_type_log(struct btf_verifier_env *env,
+++			     const struct btf_type *t)
+++{
+++	btf_verifier_log(env, "%s", btf_type_kflag(t) ? "union" : "struct");
+++}
+++
+++static struct btf_kind_operations fwd_ops = {
+++	.check_meta = btf_fwd_check_meta,
+++	.resolve = btf_df_resolve,
+++	.check_member = btf_df_check_member,
+++	.check_kflag_member = btf_df_check_kflag_member,
+++	.log_details = btf_fwd_type_log,
+++	.seq_show = btf_df_seq_show,
+++};
+++
+++static int btf_array_check_member(struct btf_verifier_env *env,
+++				  const struct btf_type *struct_type,
+++				  const struct btf_member *member,
+++				  const struct btf_type *member_type)
+++{
+++	u32 struct_bits_off = member->offset;
+++	u32 struct_size, bytes_offset;
+++	u32 array_type_id, array_size;
+++	struct btf *btf = env->btf;
+++
+++	if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Member is not byte aligned");
+++		return -EINVAL;
+++	}
+++
+++	array_type_id = member->type;
+++	btf_type_id_size(btf, &array_type_id, &array_size);
+++	struct_size = struct_type->size;
+++	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+++	if (struct_size - bytes_offset < array_size) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Member exceeds struct_size");
+++		return -EINVAL;
+++	}
+++
+++	return 0;
+++}
+++
+++static s32 btf_array_check_meta(struct btf_verifier_env *env,
+++				const struct btf_type *t,
+++				u32 meta_left)
+++{
+++	const struct btf_array *array = btf_type_array(t);
+++	u32 meta_needed = sizeof(*array);
+++
+++	if (meta_left < meta_needed) {
+++		btf_verifier_log_basic(env, t,
+++				       "meta_left:%u meta_needed:%u",
+++				       meta_left, meta_needed);
+++		return -EINVAL;
+++	}
+++
+++	/* array type should not have a name */
+++	if (t->name_off) {
+++		btf_verifier_log_type(env, t, "Invalid name");
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_vlen(t)) {
+++		btf_verifier_log_type(env, t, "vlen != 0");
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_kflag(t)) {
+++		btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+++		return -EINVAL;
+++	}
+++
+++	if (t->size) {
+++		btf_verifier_log_type(env, t, "size != 0");
+++		return -EINVAL;
+++	}
+++
+++	/* Array elem type and index type cannot be in type void,
+++	 * so !array->type and !array->index_type are not allowed.
+++	 */
+++	if (!array->type || !BTF_TYPE_ID_VALID(array->type)) {
+++		btf_verifier_log_type(env, t, "Invalid elem");
+++		return -EINVAL;
+++	}
+++
+++	if (!array->index_type || !BTF_TYPE_ID_VALID(array->index_type)) {
+++		btf_verifier_log_type(env, t, "Invalid index");
+++		return -EINVAL;
+++	}
+++
+++	btf_verifier_log_type(env, t, NULL);
+++
+++	return meta_needed;
+++}
+++
+++static int btf_array_resolve(struct btf_verifier_env *env,
+++			     const struct resolve_vertex *v)
+++{
+++	const struct btf_array *array = btf_type_array(v->t);
+++	const struct btf_type *elem_type, *index_type;
+++	u32 elem_type_id, index_type_id;
+++	struct btf *btf = env->btf;
+++	u32 elem_size;
+++
+++	/* Check array->index_type */
+++	index_type_id = array->index_type;
+++	index_type = btf_type_by_id(btf, index_type_id);
+++	if (btf_type_nosize_or_null(index_type) ||
+++	    btf_type_is_resolve_source_only(index_type)) {
+++		btf_verifier_log_type(env, v->t, "Invalid index");
+++		return -EINVAL;
+++	}
+++
+++	if (!env_type_is_resolve_sink(env, index_type) &&
+++	    !env_type_is_resolved(env, index_type_id))
+++		return env_stack_push(env, index_type, index_type_id);
+++
+++	index_type = btf_type_id_size(btf, &index_type_id, NULL);
+++	if (!index_type || !btf_type_is_int(index_type) ||
+++	    !btf_type_int_is_regular(index_type)) {
+++		btf_verifier_log_type(env, v->t, "Invalid index");
+++		return -EINVAL;
+++	}
+++
+++	/* Check array->type */
+++	elem_type_id = array->type;
+++	elem_type = btf_type_by_id(btf, elem_type_id);
+++	if (btf_type_nosize_or_null(elem_type) ||
+++	    btf_type_is_resolve_source_only(elem_type)) {
+++		btf_verifier_log_type(env, v->t,
+++				      "Invalid elem");
+++		return -EINVAL;
+++	}
+++
+++	if (!env_type_is_resolve_sink(env, elem_type) &&
+++	    !env_type_is_resolved(env, elem_type_id))
+++		return env_stack_push(env, elem_type, elem_type_id);
+++
+++	elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
+++	if (!elem_type) {
+++		btf_verifier_log_type(env, v->t, "Invalid elem");
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) {
+++		btf_verifier_log_type(env, v->t, "Invalid array of int");
+++		return -EINVAL;
+++	}
+++
+++	if (array->nelems && elem_size > U32_MAX / array->nelems) {
+++		btf_verifier_log_type(env, v->t,
+++				      "Array size overflows U32_MAX");
+++		return -EINVAL;
+++	}
+++
+++	env_stack_pop_resolved(env, elem_type_id, elem_size * array->nelems);
+++
+++	return 0;
+++}
+++
+++static void btf_array_log(struct btf_verifier_env *env,
+++			  const struct btf_type *t)
+++{
+++	const struct btf_array *array = btf_type_array(t);
+++
+++	btf_verifier_log(env, "type_id=%u index_type_id=%u nr_elems=%u",
+++			 array->type, array->index_type, array->nelems);
+++}
+++
+++static void btf_array_seq_show(const struct btf *btf, const struct btf_type *t,
+++			       u32 type_id, void *data, u8 bits_offset,
+++			       struct seq_file *m)
+++{
+++	const struct btf_array *array = btf_type_array(t);
+++	const struct btf_kind_operations *elem_ops;
+++	const struct btf_type *elem_type;
+++	u32 i, elem_size, elem_type_id;
+++
+++	elem_type_id = array->type;
+++	elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
+++	elem_ops = btf_type_ops(elem_type);
+++	seq_puts(m, "[");
+++	for (i = 0; i < array->nelems; i++) {
+++		if (i)
+++			seq_puts(m, ",");
+++
+++		elem_ops->seq_show(btf, elem_type, elem_type_id, data,
+++				   bits_offset, m);
+++		data += elem_size;
+++	}
+++	seq_puts(m, "]");
+++}
+++
+++static struct btf_kind_operations array_ops = {
+++	.check_meta = btf_array_check_meta,
+++	.resolve = btf_array_resolve,
+++	.check_member = btf_array_check_member,
+++	.check_kflag_member = btf_generic_check_kflag_member,
+++	.log_details = btf_array_log,
+++	.seq_show = btf_array_seq_show,
+++};
+++
+++static int btf_struct_check_member(struct btf_verifier_env *env,
+++				   const struct btf_type *struct_type,
+++				   const struct btf_member *member,
+++				   const struct btf_type *member_type)
+++{
+++	u32 struct_bits_off = member->offset;
+++	u32 struct_size, bytes_offset;
+++
+++	if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Member is not byte aligned");
+++		return -EINVAL;
+++	}
+++
+++	struct_size = struct_type->size;
+++	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+++	if (struct_size - bytes_offset < member_type->size) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Member exceeds struct_size");
+++		return -EINVAL;
+++	}
+++
+++	return 0;
+++}
+++
+++static s32 btf_struct_check_meta(struct btf_verifier_env *env,
+++				 const struct btf_type *t,
+++				 u32 meta_left)
+++{
+++	bool is_union = BTF_INFO_KIND(t->info) == BTF_KIND_UNION;
+++	const struct btf_member *member;
+++	u32 meta_needed, last_offset;
+++	struct btf *btf = env->btf;
+++	u32 struct_size = t->size;
+++	u32 offset;
+++	u16 i;
+++
+++	meta_needed = btf_type_vlen(t) * sizeof(*member);
+++	if (meta_left < meta_needed) {
+++		btf_verifier_log_basic(env, t,
+++				       "meta_left:%u meta_needed:%u",
+++				       meta_left, meta_needed);
+++		return -EINVAL;
+++	}
+++
+++	/* struct type either no name or a valid one */
+++	if (t->name_off &&
+++	    !btf_name_valid_identifier(env->btf, t->name_off)) {
+++		btf_verifier_log_type(env, t, "Invalid name");
+++		return -EINVAL;
+++	}
+++
+++	btf_verifier_log_type(env, t, NULL);
+++
+++	last_offset = 0;
+++	for_each_member(i, t, member) {
+++		if (!btf_name_offset_valid(btf, member->name_off)) {
+++			btf_verifier_log_member(env, t, member,
+++						"Invalid member name_offset:%u",
+++						member->name_off);
+++			return -EINVAL;
+++		}
+++
+++		/* struct member either no name or a valid one */
+++		if (member->name_off &&
+++		    !btf_name_valid_identifier(btf, member->name_off)) {
+++			btf_verifier_log_member(env, t, member, "Invalid name");
+++			return -EINVAL;
+++		}
+++		/* A member cannot be in type void */
+++		if (!member->type || !BTF_TYPE_ID_VALID(member->type)) {
+++			btf_verifier_log_member(env, t, member,
+++						"Invalid type_id");
+++			return -EINVAL;
+++		}
+++
+++		offset = btf_member_bit_offset(t, member);
+++		if (is_union && offset) {
+++			btf_verifier_log_member(env, t, member,
+++						"Invalid member bits_offset");
+++			return -EINVAL;
+++		}
+++
+++		/*
+++		 * ">" instead of ">=" because the last member could be
+++		 * "char a[0];"
+++		 */
+++		if (last_offset > offset) {
+++			btf_verifier_log_member(env, t, member,
+++						"Invalid member bits_offset");
+++			return -EINVAL;
+++		}
+++
+++		if (BITS_ROUNDUP_BYTES(offset) > struct_size) {
+++			btf_verifier_log_member(env, t, member,
+++						"Member bits_offset exceeds its struct size");
+++			return -EINVAL;
+++		}
+++
+++		btf_verifier_log_member(env, t, member, NULL);
+++		last_offset = offset;
+++	}
+++
+++	return meta_needed;
+++}
+++
+++static int btf_struct_resolve(struct btf_verifier_env *env,
+++			      const struct resolve_vertex *v)
+++{
+++	const struct btf_member *member;
+++	int err;
+++	u16 i;
+++
+++	/* Before continue resolving the next_member,
+++	 * ensure the last member is indeed resolved to a
+++	 * type with size info.
+++	 */
+++	if (v->next_member) {
+++		const struct btf_type *last_member_type;
+++		const struct btf_member *last_member;
+++		u16 last_member_type_id;
+++
+++		last_member = btf_type_member(v->t) + v->next_member - 1;
+++		last_member_type_id = last_member->type;
+++		if (WARN_ON_ONCE(!env_type_is_resolved(env,
+++						       last_member_type_id)))
+++			return -EINVAL;
+++
+++		last_member_type = btf_type_by_id(env->btf,
+++						  last_member_type_id);
+++		if (btf_type_kflag(v->t))
+++			err = btf_type_ops(last_member_type)->check_kflag_member(env, v->t,
+++								last_member,
+++								last_member_type);
+++		else
+++			err = btf_type_ops(last_member_type)->check_member(env, v->t,
+++								last_member,
+++								last_member_type);
+++		if (err)
+++			return err;
+++	}
+++
+++	for_each_member_from(i, v->next_member, v->t, member) {
+++		u32 member_type_id = member->type;
+++		const struct btf_type *member_type = btf_type_by_id(env->btf,
+++								member_type_id);
+++
+++		if (btf_type_nosize_or_null(member_type) ||
+++		    btf_type_is_resolve_source_only(member_type)) {
+++			btf_verifier_log_member(env, v->t, member,
+++						"Invalid member");
+++			return -EINVAL;
+++		}
+++
+++		if (!env_type_is_resolve_sink(env, member_type) &&
+++		    !env_type_is_resolved(env, member_type_id)) {
+++			env_stack_set_next_member(env, i + 1);
+++			return env_stack_push(env, member_type, member_type_id);
+++		}
+++
+++		if (btf_type_kflag(v->t))
+++			err = btf_type_ops(member_type)->check_kflag_member(env, v->t,
+++									    member,
+++									    member_type);
+++		else
+++			err = btf_type_ops(member_type)->check_member(env, v->t,
+++								      member,
+++								      member_type);
+++		if (err)
+++			return err;
+++	}
+++
+++	env_stack_pop_resolved(env, 0, 0);
+++
+++	return 0;
+++}
+++
+++static void btf_struct_log(struct btf_verifier_env *env,
+++			   const struct btf_type *t)
+++{
+++	btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
+++}
+++
+++/* find 'struct bpf_spin_lock' in map value.
+++ * return >= 0 offset if found
+++ * and < 0 in case of error
+++ */
+++int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
+++{
+++	const struct btf_member *member;
+++	u32 i, off = -ENOENT;
+++
+++	if (!__btf_type_is_struct(t))
+++		return -EINVAL;
+++
+++	for_each_member(i, t, member) {
+++		const struct btf_type *member_type = btf_type_by_id(btf,
+++								    member->type);
+++		if (!__btf_type_is_struct(member_type))
+++			continue;
+++		if (member_type->size != sizeof(struct bpf_spin_lock))
+++			continue;
+++		if (strcmp(__btf_name_by_offset(btf, member_type->name_off),
+++			   "bpf_spin_lock"))
+++			continue;
+++		if (off != -ENOENT)
+++			/* only one 'struct bpf_spin_lock' is allowed */
+++			return -E2BIG;
+++		off = btf_member_bit_offset(t, member);
+++		if (off % 8)
+++			/* valid C code cannot generate such BTF */
+++			return -EINVAL;
+++		off /= 8;
+++		if (off % __alignof__(struct bpf_spin_lock))
+++			/* valid struct bpf_spin_lock will be 4 byte aligned */
+++			return -EINVAL;
+++	}
+++	return off;
+++}
+++
+++static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
+++				u32 type_id, void *data, u8 bits_offset,
+++				struct seq_file *m)
+++{
+++	const char *seq = BTF_INFO_KIND(t->info) == BTF_KIND_UNION ? "|" : ",";
+++	const struct btf_member *member;
+++	u32 i;
+++
+++	seq_puts(m, "{");
+++	for_each_member(i, t, member) {
+++		const struct btf_type *member_type = btf_type_by_id(btf,
+++								member->type);
+++		const struct btf_kind_operations *ops;
+++		u32 member_offset, bitfield_size;
+++		u32 bytes_offset;
+++		u8 bits8_offset;
+++
+++		if (i)
+++			seq_puts(m, seq);
+++
+++		member_offset = btf_member_bit_offset(t, member);
+++		bitfield_size = btf_member_bitfield_size(t, member);
+++		bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
+++		bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
+++		if (bitfield_size) {
+++			btf_bitfield_seq_show(data + bytes_offset, bits8_offset,
+++					      bitfield_size, m);
+++		} else {
+++			ops = btf_type_ops(member_type);
+++			ops->seq_show(btf, member_type, member->type,
+++				      data + bytes_offset, bits8_offset, m);
+++		}
+++	}
+++	seq_puts(m, "}");
+++}
+++
+++static struct btf_kind_operations struct_ops = {
+++	.check_meta = btf_struct_check_meta,
+++	.resolve = btf_struct_resolve,
+++	.check_member = btf_struct_check_member,
+++	.check_kflag_member = btf_generic_check_kflag_member,
+++	.log_details = btf_struct_log,
+++	.seq_show = btf_struct_seq_show,
+++};
+++
+++static int btf_enum_check_member(struct btf_verifier_env *env,
+++				 const struct btf_type *struct_type,
+++				 const struct btf_member *member,
+++				 const struct btf_type *member_type)
+++{
+++	u32 struct_bits_off = member->offset;
+++	u32 struct_size, bytes_offset;
+++
+++	if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Member is not byte aligned");
+++		return -EINVAL;
+++	}
+++
+++	struct_size = struct_type->size;
+++	bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
+++	if (struct_size - bytes_offset < member_type->size) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Member exceeds struct_size");
+++		return -EINVAL;
+++	}
+++
+++	return 0;
+++}
+++
+++static int btf_enum_check_kflag_member(struct btf_verifier_env *env,
+++				       const struct btf_type *struct_type,
+++				       const struct btf_member *member,
+++				       const struct btf_type *member_type)
+++{
+++	u32 struct_bits_off, nr_bits, bytes_end, struct_size;
+++	u32 int_bitsize = sizeof(int) * BITS_PER_BYTE;
+++
+++	struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset);
+++	nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset);
+++	if (!nr_bits) {
+++		if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
+++			btf_verifier_log_member(env, struct_type, member,
+++						"Member is not byte aligned");
+++			return -EINVAL;
+++		}
+++
+++		nr_bits = int_bitsize;
+++	} else if (nr_bits > int_bitsize) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Invalid member bitfield_size");
+++		return -EINVAL;
+++	}
+++
+++	struct_size = struct_type->size;
+++	bytes_end = BITS_ROUNDUP_BYTES(struct_bits_off + nr_bits);
+++	if (struct_size < bytes_end) {
+++		btf_verifier_log_member(env, struct_type, member,
+++					"Member exceeds struct_size");
+++		return -EINVAL;
+++	}
+++
+++	return 0;
+++}
+++
+++static s32 btf_enum_check_meta(struct btf_verifier_env *env,
+++			       const struct btf_type *t,
+++			       u32 meta_left)
+++{
+++	const struct btf_enum *enums = btf_type_enum(t);
+++	struct btf *btf = env->btf;
+++	u16 i, nr_enums;
+++	u32 meta_needed;
+++
+++	nr_enums = btf_type_vlen(t);
+++	meta_needed = nr_enums * sizeof(*enums);
+++
+++	if (meta_left < meta_needed) {
+++		btf_verifier_log_basic(env, t,
+++				       "meta_left:%u meta_needed:%u",
+++				       meta_left, meta_needed);
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_kflag(t)) {
+++		btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+++		return -EINVAL;
+++	}
+++
+++	if (t->size > 8 || !is_power_of_2(t->size)) {
+++		btf_verifier_log_type(env, t, "Unexpected size");
+++		return -EINVAL;
+++	}
+++
+++	/* enum type either no name or a valid one */
+++	if (t->name_off &&
+++	    !btf_name_valid_identifier(env->btf, t->name_off)) {
+++		btf_verifier_log_type(env, t, "Invalid name");
+++		return -EINVAL;
+++	}
+++
+++	btf_verifier_log_type(env, t, NULL);
+++
+++	for (i = 0; i < nr_enums; i++) {
+++		if (!btf_name_offset_valid(btf, enums[i].name_off)) {
+++			btf_verifier_log(env, "\tInvalid name_offset:%u",
+++					 enums[i].name_off);
+++			return -EINVAL;
+++		}
+++
+++		/* enum member must have a valid name */
+++		if (!enums[i].name_off ||
+++		    !btf_name_valid_identifier(btf, enums[i].name_off)) {
+++			btf_verifier_log_type(env, t, "Invalid name");
+++			return -EINVAL;
+++		}
+++
+++
+++		btf_verifier_log(env, "\t%s val=%d\n",
+++				 __btf_name_by_offset(btf, enums[i].name_off),
+++				 enums[i].val);
+++	}
+++
+++	return meta_needed;
+++}
+++
+++static void btf_enum_log(struct btf_verifier_env *env,
+++			 const struct btf_type *t)
+++{
+++	btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
+++}
+++
+++static void btf_enum_seq_show(const struct btf *btf, const struct btf_type *t,
+++			      u32 type_id, void *data, u8 bits_offset,
+++			      struct seq_file *m)
+++{
+++	const struct btf_enum *enums = btf_type_enum(t);
+++	u32 i, nr_enums = btf_type_vlen(t);
+++	int v = *(int *)data;
+++
+++	for (i = 0; i < nr_enums; i++) {
+++		if (v == enums[i].val) {
+++			seq_printf(m, "%s",
+++				   __btf_name_by_offset(btf,
+++							enums[i].name_off));
+++			return;
+++		}
+++	}
+++
+++	seq_printf(m, "%d", v);
+++}
+++
+++static struct btf_kind_operations enum_ops = {
+++	.check_meta = btf_enum_check_meta,
+++	.resolve = btf_df_resolve,
+++	.check_member = btf_enum_check_member,
+++	.check_kflag_member = btf_enum_check_kflag_member,
+++	.log_details = btf_enum_log,
+++	.seq_show = btf_enum_seq_show,
+++};
+++
+++static s32 btf_func_proto_check_meta(struct btf_verifier_env *env,
+++				     const struct btf_type *t,
+++				     u32 meta_left)
+++{
+++	u32 meta_needed = btf_type_vlen(t) * sizeof(struct btf_param);
+++
+++	if (meta_left < meta_needed) {
+++		btf_verifier_log_basic(env, t,
+++				       "meta_left:%u meta_needed:%u",
+++				       meta_left, meta_needed);
+++		return -EINVAL;
+++	}
+++
+++	if (t->name_off) {
+++		btf_verifier_log_type(env, t, "Invalid name");
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_kflag(t)) {
+++		btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+++		return -EINVAL;
+++	}
+++
+++	btf_verifier_log_type(env, t, NULL);
+++
+++	return meta_needed;
+++}
+++
+++static void btf_func_proto_log(struct btf_verifier_env *env,
+++			       const struct btf_type *t)
+++{
+++	const struct btf_param *args = (const struct btf_param *)(t + 1);
+++	u16 nr_args = btf_type_vlen(t), i;
+++
+++	btf_verifier_log(env, "return=%u args=(", t->type);
+++	if (!nr_args) {
+++		btf_verifier_log(env, "void");
+++		goto done;
+++	}
+++
+++	if (nr_args == 1 && !args[0].type) {
+++		/* Only one vararg */
+++		btf_verifier_log(env, "vararg");
+++		goto done;
+++	}
+++
+++	btf_verifier_log(env, "%u %s", args[0].type,
+++			 __btf_name_by_offset(env->btf,
+++					      args[0].name_off));
+++	for (i = 1; i < nr_args - 1; i++)
+++		btf_verifier_log(env, ", %u %s", args[i].type,
+++				 __btf_name_by_offset(env->btf,
+++						      args[i].name_off));
+++
+++	if (nr_args > 1) {
+++		const struct btf_param *last_arg = &args[nr_args - 1];
+++
+++		if (last_arg->type)
+++			btf_verifier_log(env, ", %u %s", last_arg->type,
+++					 __btf_name_by_offset(env->btf,
+++							      last_arg->name_off));
+++		else
+++			btf_verifier_log(env, ", vararg");
+++	}
+++
+++done:
+++	btf_verifier_log(env, ")");
+++}
+++
+++static struct btf_kind_operations func_proto_ops = {
+++	.check_meta = btf_func_proto_check_meta,
+++	.resolve = btf_df_resolve,
+++	/*
+++	 * BTF_KIND_FUNC_PROTO cannot be directly referred by
+++	 * a struct's member.
+++	 *
+++	 * It should be a funciton pointer instead.
+++	 * (i.e. struct's member -> BTF_KIND_PTR -> BTF_KIND_FUNC_PROTO)
+++	 *
+++	 * Hence, there is no btf_func_check_member().
+++	 */
+++	.check_member = btf_df_check_member,
+++	.check_kflag_member = btf_df_check_kflag_member,
+++	.log_details = btf_func_proto_log,
+++	.seq_show = btf_df_seq_show,
+++};
+++
+++static s32 btf_func_check_meta(struct btf_verifier_env *env,
+++			       const struct btf_type *t,
+++			       u32 meta_left)
+++{
+++	if (!t->name_off ||
+++	    !btf_name_valid_identifier(env->btf, t->name_off)) {
+++		btf_verifier_log_type(env, t, "Invalid name");
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_vlen(t)) {
+++		btf_verifier_log_type(env, t, "vlen != 0");
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_kflag(t)) {
+++		btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+++		return -EINVAL;
+++	}
+++
+++	btf_verifier_log_type(env, t, NULL);
+++
+++	return 0;
+++}
+++
+++static struct btf_kind_operations func_ops = {
+++	.check_meta = btf_func_check_meta,
+++	.resolve = btf_df_resolve,
+++	.check_member = btf_df_check_member,
+++	.check_kflag_member = btf_df_check_kflag_member,
+++	.log_details = btf_ref_type_log,
+++	.seq_show = btf_df_seq_show,
+++};
+++
+++static s32 btf_var_check_meta(struct btf_verifier_env *env,
+++			      const struct btf_type *t,
+++			      u32 meta_left)
+++{
+++	const struct btf_var *var;
+++	u32 meta_needed = sizeof(*var);
+++
+++	if (meta_left < meta_needed) {
+++		btf_verifier_log_basic(env, t,
+++				       "meta_left:%u meta_needed:%u",
+++				       meta_left, meta_needed);
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_vlen(t)) {
+++		btf_verifier_log_type(env, t, "vlen != 0");
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_kflag(t)) {
+++		btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+++		return -EINVAL;
+++	}
+++
+++	if (!t->name_off ||
+++	    !__btf_name_valid(env->btf, t->name_off, true)) {
+++		btf_verifier_log_type(env, t, "Invalid name");
+++		return -EINVAL;
+++	}
+++
+++	/* A var cannot be in type void */
+++	if (!t->type || !BTF_TYPE_ID_VALID(t->type)) {
+++		btf_verifier_log_type(env, t, "Invalid type_id");
+++		return -EINVAL;
+++	}
+++
+++	var = btf_type_var(t);
+++	if (var->linkage != BTF_VAR_STATIC &&
+++	    var->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
+++		btf_verifier_log_type(env, t, "Linkage not supported");
+++		return -EINVAL;
+++	}
+++
+++	btf_verifier_log_type(env, t, NULL);
+++
+++	return meta_needed;
+++}
+++
+++static void btf_var_log(struct btf_verifier_env *env, const struct btf_type *t)
+++{
+++	const struct btf_var *var = btf_type_var(t);
+++
+++	btf_verifier_log(env, "type_id=%u linkage=%u", t->type, var->linkage);
+++}
+++
+++static const struct btf_kind_operations var_ops = {
+++	.check_meta		= btf_var_check_meta,
+++	.resolve		= btf_var_resolve,
+++	.check_member		= btf_df_check_member,
+++	.check_kflag_member	= btf_df_check_kflag_member,
+++	.log_details		= btf_var_log,
+++	.seq_show		= btf_var_seq_show,
+++};
+++
+++static s32 btf_datasec_check_meta(struct btf_verifier_env *env,
+++				  const struct btf_type *t,
+++				  u32 meta_left)
+++{
+++	const struct btf_var_secinfo *vsi;
+++	u64 last_vsi_end_off = 0, sum = 0;
+++	u32 i, meta_needed;
+++
+++	meta_needed = btf_type_vlen(t) * sizeof(*vsi);
+++	if (meta_left < meta_needed) {
+++		btf_verifier_log_basic(env, t,
+++				       "meta_left:%u meta_needed:%u",
+++				       meta_left, meta_needed);
+++		return -EINVAL;
+++	}
+++
+++	if (!btf_type_vlen(t)) {
+++		btf_verifier_log_type(env, t, "vlen == 0");
+++		return -EINVAL;
+++	}
+++
+++	if (!t->size) {
+++		btf_verifier_log_type(env, t, "size == 0");
+++		return -EINVAL;
+++	}
+++
+++	if (btf_type_kflag(t)) {
+++		btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+++		return -EINVAL;
+++	}
+++
+++	if (!t->name_off ||
+++	    !btf_name_valid_section(env->btf, t->name_off)) {
+++		btf_verifier_log_type(env, t, "Invalid name");
+++		return -EINVAL;
+++	}
+++
+++	btf_verifier_log_type(env, t, NULL);
+++
+++	for_each_vsi(i, t, vsi) {
+++		/* A var cannot be in type void */
+++		if (!vsi->type || !BTF_TYPE_ID_VALID(vsi->type)) {
+++			btf_verifier_log_vsi(env, t, vsi,
+++					     "Invalid type_id");
+++			return -EINVAL;
+++		}
+++
+++		if (vsi->offset < last_vsi_end_off || vsi->offset >= t->size) {
+++			btf_verifier_log_vsi(env, t, vsi,
+++					     "Invalid offset");
+++			return -EINVAL;
+++		}
+++
+++		if (!vsi->size || vsi->size > t->size) {
+++			btf_verifier_log_vsi(env, t, vsi,
+++					     "Invalid size");
+++			return -EINVAL;
+++		}
+++
+++		last_vsi_end_off = vsi->offset + vsi->size;
+++		if (last_vsi_end_off > t->size) {
+++			btf_verifier_log_vsi(env, t, vsi,
+++					     "Invalid offset+size");
+++			return -EINVAL;
+++		}
+++
+++		btf_verifier_log_vsi(env, t, vsi, NULL);
+++		sum += vsi->size;
+++	}
+++
+++	if (t->size < sum) {
+++		btf_verifier_log_type(env, t, "Invalid btf_info size");
+++		return -EINVAL;
+++	}
+++
+++	return meta_needed;
+++}
+++
+++static int btf_datasec_resolve(struct btf_verifier_env *env,
+++			       const struct resolve_vertex *v)
+++{
+++	const struct btf_var_secinfo *vsi;
+++	struct btf *btf = env->btf;
+++	u16 i;
+++
+++	for_each_vsi_from(i, v->next_member, v->t, vsi) {
+++		u32 var_type_id = vsi->type, type_id, type_size = 0;
+++		const struct btf_type *var_type = btf_type_by_id(env->btf,
+++								 var_type_id);
+++		if (!var_type || !btf_type_is_var(var_type)) {
+++			btf_verifier_log_vsi(env, v->t, vsi,
+++					     "Not a VAR kind member");
+++			return -EINVAL;
+++		}
+++
+++		if (!env_type_is_resolve_sink(env, var_type) &&
+++		    !env_type_is_resolved(env, var_type_id)) {
+++			env_stack_set_next_member(env, i + 1);
+++			return env_stack_push(env, var_type, var_type_id);
+++		}
+++
+++		type_id = var_type->type;
+++		if (!btf_type_id_size(btf, &type_id, &type_size)) {
+++			btf_verifier_log_vsi(env, v->t, vsi, "Invalid type");
+++			return -EINVAL;
+++		}
+++
+++		if (vsi->size < type_size) {
+++			btf_verifier_log_vsi(env, v->t, vsi, "Invalid size");
+++			return -EINVAL;
+++		}
+++	}
+++
+++	env_stack_pop_resolved(env, 0, 0);
+++	return 0;
+++}
+++
+++static void btf_datasec_log(struct btf_verifier_env *env,
+++			    const struct btf_type *t)
+++{
+++	btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
+++}
+++
+++static void btf_datasec_seq_show(const struct btf *btf,
+++				 const struct btf_type *t, u32 type_id,
+++				 void *data, u8 bits_offset,
+++				 struct seq_file *m)
+++{
+++	const struct btf_var_secinfo *vsi;
+++	const struct btf_type *var;
+++	u32 i;
+++
+++	seq_printf(m, "section (\"%s\") = {", __btf_name_by_offset(btf, t->name_off));
+++	for_each_vsi(i, t, vsi) {
+++		var = btf_type_by_id(btf, vsi->type);
+++		if (i)
+++			seq_puts(m, ",");
+++		btf_type_ops(var)->seq_show(btf, var, vsi->type,
+++					    data + vsi->offset, bits_offset, m);
+++	}
+++	seq_puts(m, "}");
+++}
+++
+++static const struct btf_kind_operations datasec_ops = {
+++	.check_meta		= btf_datasec_check_meta,
+++	.resolve		= btf_datasec_resolve,
+++	.check_member		= btf_df_check_member,
+++	.check_kflag_member	= btf_df_check_kflag_member,
+++	.log_details		= btf_datasec_log,
+++	.seq_show		= btf_datasec_seq_show,
+++};
+++
+++static int btf_func_proto_check(struct btf_verifier_env *env,
+++				const struct btf_type *t)
+++{
+++	const struct btf_type *ret_type;
+++	const struct btf_param *args;
+++	const struct btf *btf;
+++	u16 nr_args, i;
+++	int err;
+++
+++	btf = env->btf;
+++	args = (const struct btf_param *)(t + 1);
+++	nr_args = btf_type_vlen(t);
+++
+++	/* Check func return type which could be "void" (t->type == 0) */
+++	if (t->type) {
+++		u32 ret_type_id = t->type;
+++
+++		ret_type = btf_type_by_id(btf, ret_type_id);
+++		if (!ret_type) {
+++			btf_verifier_log_type(env, t, "Invalid return type");
+++			return -EINVAL;
+++		}
+++
+++		if (btf_type_needs_resolve(ret_type) &&
+++		    !env_type_is_resolved(env, ret_type_id)) {
+++			err = btf_resolve(env, ret_type, ret_type_id);
+++			if (err)
+++				return err;
+++		}
+++
+++		/* Ensure the return type is a type that has a size */
+++		if (!btf_type_id_size(btf, &ret_type_id, NULL)) {
+++			btf_verifier_log_type(env, t, "Invalid return type");
+++			return -EINVAL;
+++		}
+++	}
+++
+++	if (!nr_args)
+++		return 0;
+++
+++	/* Last func arg type_id could be 0 if it is a vararg */
+++	if (!args[nr_args - 1].type) {
+++		if (args[nr_args - 1].name_off) {
+++			btf_verifier_log_type(env, t, "Invalid arg#%u",
+++					      nr_args);
+++			return -EINVAL;
+++		}
+++		nr_args--;
+++	}
+++
+++	err = 0;
+++	for (i = 0; i < nr_args; i++) {
+++		const struct btf_type *arg_type;
+++		u32 arg_type_id;
+++
+++		arg_type_id = args[i].type;
+++		arg_type = btf_type_by_id(btf, arg_type_id);
+++		if (!arg_type) {
+++			btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
+++			err = -EINVAL;
+++			break;
+++		}
+++
+++		if (args[i].name_off &&
+++		    (!btf_name_offset_valid(btf, args[i].name_off) ||
+++		     !btf_name_valid_identifier(btf, args[i].name_off))) {
+++			btf_verifier_log_type(env, t,
+++					      "Invalid arg#%u", i + 1);
+++			err = -EINVAL;
+++			break;
+++		}
+++
+++		if (btf_type_needs_resolve(arg_type) &&
+++		    !env_type_is_resolved(env, arg_type_id)) {
+++			err = btf_resolve(env, arg_type, arg_type_id);
+++			if (err)
+++				break;
+++		}
+++
+++		if (!btf_type_id_size(btf, &arg_type_id, NULL)) {
+++			btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
+++			err = -EINVAL;
+++			break;
+++		}
+++	}
+++
+++	return err;
+++}
+++
+++static int btf_func_check(struct btf_verifier_env *env,
+++			  const struct btf_type *t)
+++{
+++	const struct btf_type *proto_type;
+++	const struct btf_param *args;
+++	const struct btf *btf;
+++	u16 nr_args, i;
+++
+++	btf = env->btf;
+++	proto_type = btf_type_by_id(btf, t->type);
+++
+++	if (!proto_type || !btf_type_is_func_proto(proto_type)) {
+++		btf_verifier_log_type(env, t, "Invalid type_id");
+++		return -EINVAL;
+++	}
+++
+++	args = (const struct btf_param *)(proto_type + 1);
+++	nr_args = btf_type_vlen(proto_type);
+++	for (i = 0; i < nr_args; i++) {
+++		if (!args[i].name_off && args[i].type) {
+++			btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
+++			return -EINVAL;
+++		}
+++	}
+++
+++	return 0;
+++}
+++
+++static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
+++	[BTF_KIND_INT] = &int_ops,
+++	[BTF_KIND_PTR] = &ptr_ops,
+++	[BTF_KIND_ARRAY] = &array_ops,
+++	[BTF_KIND_STRUCT] = &struct_ops,
+++	[BTF_KIND_UNION] = &struct_ops,
+++	[BTF_KIND_ENUM] = &enum_ops,
+++	[BTF_KIND_FWD] = &fwd_ops,
+++	[BTF_KIND_TYPEDEF] = &modifier_ops,
+++	[BTF_KIND_VOLATILE] = &modifier_ops,
+++	[BTF_KIND_CONST] = &modifier_ops,
+++	[BTF_KIND_RESTRICT] = &modifier_ops,
+++	[BTF_KIND_FUNC] = &func_ops,
+++	[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
+++	[BTF_KIND_VAR] = &var_ops,
+++	[BTF_KIND_DATASEC] = &datasec_ops,
+++};
+++
+++static s32 btf_check_meta(struct btf_verifier_env *env,
+++			  const struct btf_type *t,
+++			  u32 meta_left)
+++{
+++	u32 saved_meta_left = meta_left;
+++	s32 var_meta_size;
+++
+++	if (meta_left < sizeof(*t)) {
+++		btf_verifier_log(env, "[%u] meta_left:%u meta_needed:%zu",
+++				 env->log_type_id, meta_left, sizeof(*t));
+++		return -EINVAL;
+++	}
+++	meta_left -= sizeof(*t);
+++
+++	if (t->info & ~BTF_INFO_MASK) {
+++		btf_verifier_log(env, "[%u] Invalid btf_info:%x",
+++				 env->log_type_id, t->info);
+++		return -EINVAL;
+++	}
+++
+++	if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX ||
+++	    BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) {
+++		btf_verifier_log(env, "[%u] Invalid kind:%u",
+++				 env->log_type_id, BTF_INFO_KIND(t->info));
+++		return -EINVAL;
+++	}
+++
+++	if (!btf_name_offset_valid(env->btf, t->name_off)) {
+++		btf_verifier_log(env, "[%u] Invalid name_offset:%u",
+++				 env->log_type_id, t->name_off);
+++		return -EINVAL;
+++	}
+++
+++	var_meta_size = btf_type_ops(t)->check_meta(env, t, meta_left);
+++	if (var_meta_size < 0)
+++		return var_meta_size;
+++
+++	meta_left -= var_meta_size;
+++
+++	return saved_meta_left - meta_left;
+++}
+++
+++static int btf_check_all_metas(struct btf_verifier_env *env)
+++{
+++	struct btf *btf = env->btf;
+++	struct btf_header *hdr;
+++	void *cur, *end;
+++
+++	hdr = &btf->hdr;
+++	cur = btf->nohdr_data + hdr->type_off;
+++	end = cur + hdr->type_len;
+++
+++	env->log_type_id = 1;
+++	while (cur < end) {
+++		struct btf_type *t = cur;
+++		s32 meta_size;
+++
+++		meta_size = btf_check_meta(env, t, end - cur);
+++		if (meta_size < 0)
+++			return meta_size;
+++
+++		btf_add_type(env, t);
+++		cur += meta_size;
+++		env->log_type_id++;
+++	}
+++
+++	return 0;
+++}
+++
+++static bool btf_resolve_valid(struct btf_verifier_env *env,
+++			      const struct btf_type *t,
+++			      u32 type_id)
+++{
+++	struct btf *btf = env->btf;
+++
+++	if (!env_type_is_resolved(env, type_id))
+++		return false;
+++
+++	if (btf_type_is_struct(t) || btf_type_is_datasec(t))
+++		return !btf->resolved_ids[type_id] &&
+++		       !btf->resolved_sizes[type_id];
+++
+++	if (btf_type_is_modifier(t) || btf_type_is_ptr(t) ||
+++	    btf_type_is_var(t)) {
+++		t = btf_type_id_resolve(btf, &type_id);
+++		return t &&
+++		       !btf_type_is_modifier(t) &&
+++		       !btf_type_is_var(t) &&
+++		       !btf_type_is_datasec(t);
+++	}
+++
+++	if (btf_type_is_array(t)) {
+++		const struct btf_array *array = btf_type_array(t);
+++		const struct btf_type *elem_type;
+++		u32 elem_type_id = array->type;
+++		u32 elem_size;
+++
+++		elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
+++		return elem_type && !btf_type_is_modifier(elem_type) &&
+++			(array->nelems * elem_size ==
+++			 btf->resolved_sizes[type_id]);
+++	}
+++
+++	return false;
+++}
+++
+++static int btf_resolve(struct btf_verifier_env *env,
+++		       const struct btf_type *t, u32 type_id)
+++{
+++	u32 save_log_type_id = env->log_type_id;
+++	const struct resolve_vertex *v;
+++	int err = 0;
+++
+++	env->resolve_mode = RESOLVE_TBD;
+++	env_stack_push(env, t, type_id);
+++	while (!err && (v = env_stack_peak(env))) {
+++		env->log_type_id = v->type_id;
+++		err = btf_type_ops(v->t)->resolve(env, v);
+++	}
+++
+++	env->log_type_id = type_id;
+++	if (err == -E2BIG) {
+++		btf_verifier_log_type(env, t,
+++				      "Exceeded max resolving depth:%u",
+++				      MAX_RESOLVE_DEPTH);
+++	} else if (err == -EEXIST) {
+++		btf_verifier_log_type(env, t, "Loop detected");
+++	}
+++
+++	/* Final sanity check */
+++	if (!err && !btf_resolve_valid(env, t, type_id)) {
+++		btf_verifier_log_type(env, t, "Invalid resolve state");
+++		err = -EINVAL;
+++	}
+++
+++	env->log_type_id = save_log_type_id;
+++	return err;
+++}
+++
+++static int btf_check_all_types(struct btf_verifier_env *env)
+++{
+++	struct btf *btf = env->btf;
+++	u32 type_id;
+++	int err;
+++
+++	err = env_resolve_init(env);
+++	if (err)
+++		return err;
+++
+++	env->phase++;
+++	for (type_id = 1; type_id <= btf->nr_types; type_id++) {
+++		const struct btf_type *t = btf_type_by_id(btf, type_id);
+++
+++		env->log_type_id = type_id;
+++		if (btf_type_needs_resolve(t) &&
+++		    !env_type_is_resolved(env, type_id)) {
+++			err = btf_resolve(env, t, type_id);
+++			if (err)
+++				return err;
+++		}
+++
+++		if (btf_type_is_func_proto(t)) {
+++			err = btf_func_proto_check(env, t);
+++			if (err)
+++				return err;
+++		}
+++
+++		if (btf_type_is_func(t)) {
+++			err = btf_func_check(env, t);
+++			if (err)
+++				return err;
+++		}
+++	}
+++
+++	return 0;
+++}
+++
+++static int btf_parse_type_sec(struct btf_verifier_env *env)
+++{
+++	const struct btf_header *hdr = &env->btf->hdr;
+++	int err;
+++
+++	/* Type section must align to 4 bytes */
+++	if (hdr->type_off & (sizeof(u32) - 1)) {
+++		btf_verifier_log(env, "Unaligned type_off");
+++		return -EINVAL;
+++	}
+++
+++	if (!hdr->type_len) {
+++		btf_verifier_log(env, "No type found");
+++		return -EINVAL;
+++	}
+++
+++	err = btf_check_all_metas(env);
+++	if (err)
+++		return err;
+++
+++	return btf_check_all_types(env);
+++}
+++
+++static int btf_parse_str_sec(struct btf_verifier_env *env)
+++{
+++	const struct btf_header *hdr;
+++	struct btf *btf = env->btf;
+++	const char *start, *end;
+++
+++	hdr = &btf->hdr;
+++	start = btf->nohdr_data + hdr->str_off;
+++	end = start + hdr->str_len;
+++
+++	if (end != btf->data + btf->data_size) {
+++		btf_verifier_log(env, "String section is not at the end");
+++		return -EINVAL;
+++	}
+++
+++	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
+++	    start[0] || end[-1]) {
+++		btf_verifier_log(env, "Invalid string section");
+++		return -EINVAL;
+++	}
+++
+++	btf->strings = start;
+++
+++	return 0;
+++}
+++
+++static const size_t btf_sec_info_offset[] = {
+++	offsetof(struct btf_header, type_off),
+++	offsetof(struct btf_header, str_off),
+++};
+++
+++static int btf_sec_info_cmp(const void *a, const void *b)
+++{
+++	const struct btf_sec_info *x = a;
+++	const struct btf_sec_info *y = b;
+++
+++	return (int)(x->off - y->off) ? : (int)(x->len - y->len);
+++}
+++
+++static int btf_check_sec_info(struct btf_verifier_env *env,
+++			      u32 btf_data_size)
+++{
+++	struct btf_sec_info secs[ARRAY_SIZE(btf_sec_info_offset)];
+++	u32 total, expected_total, i;
+++	const struct btf_header *hdr;
+++	const struct btf *btf;
+++
+++	btf = env->btf;
+++	hdr = &btf->hdr;
+++
+++	/* Populate the secs from hdr */
+++	for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++)
+++		secs[i] = *(struct btf_sec_info *)((void *)hdr +
+++						   btf_sec_info_offset[i]);
+++
+++	sort(secs, ARRAY_SIZE(btf_sec_info_offset),
+++	     sizeof(struct btf_sec_info), btf_sec_info_cmp, NULL);
+++
+++	/* Check for gaps and overlap among sections */
+++	total = 0;
+++	expected_total = btf_data_size - hdr->hdr_len;
+++	for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) {
+++		if (expected_total < secs[i].off) {
+++			btf_verifier_log(env, "Invalid section offset");
+++			return -EINVAL;
+++		}
+++		if (total < secs[i].off) {
+++			/* gap */
+++			btf_verifier_log(env, "Unsupported section found");
+++			return -EINVAL;
+++		}
+++		if (total > secs[i].off) {
+++			btf_verifier_log(env, "Section overlap found");
+++			return -EINVAL;
+++		}
+++		if (expected_total - total < secs[i].len) {
+++			btf_verifier_log(env,
+++					 "Total section length too long");
+++			return -EINVAL;
+++		}
+++		total += secs[i].len;
+++	}
+++
+++	/* There is data other than hdr and known sections */
+++	if (expected_total != total) {
+++		btf_verifier_log(env, "Unsupported section found");
+++		return -EINVAL;
+++	}
+++
+++	return 0;
+++}
+++
+++static int btf_parse_hdr(struct btf_verifier_env *env)
+++{
+++	u32 hdr_len, hdr_copy, btf_data_size;
+++	const struct btf_header *hdr;
+++	struct btf *btf;
+++	int err;
+++
+++	btf = env->btf;
+++	btf_data_size = btf->data_size;
+++
+++	if (btf_data_size <
+++	    offsetof(struct btf_header, hdr_len) + sizeof(hdr->hdr_len)) {
+++		btf_verifier_log(env, "hdr_len not found");
+++		return -EINVAL;
+++	}
+++
+++	hdr = btf->data;
+++	hdr_len = hdr->hdr_len;
+++	if (btf_data_size < hdr_len) {
+++		btf_verifier_log(env, "btf_header not found");
+++		return -EINVAL;
+++	}
+++
+++	/* Ensure the unsupported header fields are zero */
+++	if (hdr_len > sizeof(btf->hdr)) {
+++		u8 *expected_zero = btf->data + sizeof(btf->hdr);
+++		u8 *end = btf->data + hdr_len;
+++
+++		for (; expected_zero < end; expected_zero++) {
+++			if (*expected_zero) {
+++				btf_verifier_log(env, "Unsupported btf_header");
+++				return -E2BIG;
+++			}
+++		}
+++	}
+++
+++	hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
+++	memcpy(&btf->hdr, btf->data, hdr_copy);
+++
+++	hdr = &btf->hdr;
+++
+++	btf_verifier_log_hdr(env, btf_data_size);
+++
+++	if (hdr->magic != BTF_MAGIC) {
+++		btf_verifier_log(env, "Invalid magic");
+++		return -EINVAL;
+++	}
+++
+++	if (hdr->version != BTF_VERSION) {
+++		btf_verifier_log(env, "Unsupported version");
+++		return -ENOTSUPP;
+++	}
+++
+++	if (hdr->flags) {
+++		btf_verifier_log(env, "Unsupported flags");
+++		return -ENOTSUPP;
+++	}
+++
+++	if (btf_data_size == hdr->hdr_len) {
+++		btf_verifier_log(env, "No data");
+++		return -EINVAL;
+++	}
+++
+++	err = btf_check_sec_info(env, btf_data_size);
+++	if (err)
+++		return err;
+++
+++	return 0;
+++}
+++
+++static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
+++			     u32 log_level, char __user *log_ubuf, u32 log_size)
+++{
+++	struct btf_verifier_env *env = NULL;
+++	struct bpf_verifier_log *log;
+++	struct btf *btf = NULL;
+++	u8 *data;
+++	int err;
+++
+++	if (btf_data_size > BTF_MAX_SIZE)
+++		return ERR_PTR(-E2BIG);
+++
+++	env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
+++	if (!env)
+++		return ERR_PTR(-ENOMEM);
+++
+++	log = &env->log;
+++	if (log_level || log_ubuf || log_size) {
+++		/* user requested verbose verifier output
+++		 * and supplied buffer to store the verification trace
+++		 */
+++		log->level = log_level;
+++		log->ubuf = log_ubuf;
+++		log->len_total = log_size;
+++
+++		/* log attributes have to be sane */
+++		if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
+++		    !log->level || !log->ubuf) {
+++			err = -EINVAL;
+++			goto errout;
+++		}
+++	}
+++
+++	btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
+++	if (!btf) {
+++		err = -ENOMEM;
+++		goto errout;
+++	}
+++	env->btf = btf;
+++
+++	data = kmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
+++	if (!data) {
+++		err = -ENOMEM;
+++		goto errout;
+++	}
+++
+++	btf->data = data;
+++	btf->data_size = btf_data_size;
+++
+++	if (copy_from_user(data, btf_data, btf_data_size)) {
+++		err = -EFAULT;
+++		goto errout;
+++	}
+++
+++	err = btf_parse_hdr(env);
+++	if (err)
+++		goto errout;
+++
+++	btf->nohdr_data = btf->data + btf->hdr.hdr_len;
+++
+++	err = btf_parse_str_sec(env);
+++	if (err)
+++		goto errout;
+++
+++	err = btf_parse_type_sec(env);
+++	if (err)
+++		goto errout;
+++
+++	if (log->level && bpf_verifier_log_full(log)) {
+++		err = -ENOSPC;
+++		goto errout;
+++	}
+++
+++	btf_verifier_env_free(env);
+++	refcount_set(&btf->refcnt, 1);
+++	return btf;
+++
+++errout:
+++	btf_verifier_env_free(env);
+++	if (btf)
+++		btf_free(btf);
+++	return ERR_PTR(err);
+++}
+++
+++void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
+++		       struct seq_file *m)
+++{
+++	const struct btf_type *t = btf_type_by_id(btf, type_id);
+++
+++	btf_type_ops(t)->seq_show(btf, t, type_id, obj, 0, m);
+++}
+++
+++#ifdef CONFIG_PROC_FS
+++static void bpf_btf_show_fdinfo(struct seq_file *m, struct file *filp)
+++{
+++	const struct btf *btf = filp->private_data;
+++
+++	seq_printf(m, "btf_id:\t%u\n", btf->id);
+++}
+++#endif
+++
+++static int btf_release(struct inode *inode, struct file *filp)
+++{
+++	btf_put(filp->private_data);
+++	return 0;
+++}
+++
+++const struct file_operations btf_fops = {
+++#ifdef CONFIG_PROC_FS
+++	.show_fdinfo	= bpf_btf_show_fdinfo,
+++#endif
+++	.release	= btf_release,
+++};
+++
+++static int __btf_new_fd(struct btf *btf)
+++{
+++	return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC);
+++}
+++
+++int btf_new_fd(const union bpf_attr *attr)
+++{
+++	struct btf *btf;
+++	int ret;
+++
+++	btf = btf_parse(u64_to_user_ptr(attr->btf),
+++			attr->btf_size, attr->btf_log_level,
+++			u64_to_user_ptr(attr->btf_log_buf),
+++			attr->btf_log_size);
+++	if (IS_ERR(btf))
+++		return PTR_ERR(btf);
+++
+++	ret = btf_alloc_id(btf);
+++	if (ret) {
+++		btf_free(btf);
+++		return ret;
+++	}
+++
+++	/*
+++	 * The BTF ID is published to the userspace.
+++	 * All BTF free must go through call_rcu() from
+++	 * now on (i.e. free by calling btf_put()).
+++	 */
+++
+++	ret = __btf_new_fd(btf);
+++	if (ret < 0)
+++		btf_put(btf);
+++
+++	return ret;
+++}
+++
+++struct btf *btf_get_by_fd(int fd)
+++{
+++	struct btf *btf;
+++	struct fd f;
+++
+++	f = fdget(fd);
+++
+++	if (!f.file)
+++		return ERR_PTR(-EBADF);
+++
+++	if (f.file->f_op != &btf_fops) {
+++		fdput(f);
+++		return ERR_PTR(-EINVAL);
+++	}
+++
+++	btf = f.file->private_data;
+++	refcount_inc(&btf->refcnt);
+++	fdput(f);
+++
+++	return btf;
+++}
+++
+++int btf_get_info_by_fd(const struct btf *btf,
+++		       const union bpf_attr *attr,
+++		       union bpf_attr __user *uattr)
+++{
+++	struct bpf_btf_info __user *uinfo;
+++	struct bpf_btf_info info;
+++	u32 info_copy, btf_copy;
+++	void __user *ubtf;
+++	u32 uinfo_len;
+++
+++	uinfo = u64_to_user_ptr(attr->info.info);
+++	uinfo_len = attr->info.info_len;
+++
+++	info_copy = min_t(u32, uinfo_len, sizeof(info));
+++	memset(&info, 0, sizeof(info));
+++	if (copy_from_user(&info, uinfo, info_copy))
+++		return -EFAULT;
+++
+++	info.id = btf->id;
+++	ubtf = u64_to_user_ptr(info.btf);
+++	btf_copy = min_t(u32, btf->data_size, info.btf_size);
+++	if (copy_to_user(ubtf, btf->data, btf_copy))
+++		return -EFAULT;
+++	info.btf_size = btf->data_size;
+++
+++	if (copy_to_user(uinfo, &info, info_copy) ||
+++	    put_user(info_copy, &uattr->info.info_len))
+++		return -EFAULT;
+++
+++	return 0;
+++}
+++
+++int btf_get_fd_by_id(u32 id)
+++{
+++	struct btf *btf;
+++	int fd;
+++
+++	rcu_read_lock();
+++	btf = idr_find(&btf_idr, id);
+++	if (!btf || !refcount_inc_not_zero(&btf->refcnt))
+++		btf = ERR_PTR(-ENOENT);
+++	rcu_read_unlock();
+++
+++	if (IS_ERR(btf))
+++		return PTR_ERR(btf);
+++
+++	fd = __btf_new_fd(btf);
+++	if (fd < 0)
+++		btf_put(btf);
+++
+++	return fd;
+++}
+++
+++u32 btf_id(const struct btf *btf)
+++{
+++	return btf->id;
+++}
++--- /dev/null
+++++ b/kernel/bpf/cgroup.c
++@@ -0,0 +1,1581 @@
+++// SPDX-License-Identifier: GPL-2.0-only
+++/*
+++ * Functions to manage eBPF programs attached to cgroups
+++ *
+++ * Copyright (c) 2016 Daniel Mack
+++ */
+++
+++#include <linux/kernel.h>
+++#include <linux/atomic.h>
+++#include <linux/cgroup.h>
+++#include <linux/filter.h>
+++#include <linux/slab.h>
+++#include <linux/sysctl.h>
+++#include <linux/string.h>
+++#include <linux/bpf.h>
+++#include <linux/bpf-cgroup.h>
+++#include <net/sock.h>
+++#include <net/bpf_sk_storage.h>
+++
+++#include "../cgroup/cgroup-internal.h"
+++
+++DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
+++EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+++
+++void cgroup_bpf_offline(struct cgroup *cgrp)
+++{
+++	cgroup_get(cgrp);
+++	percpu_ref_kill(&cgrp->bpf.refcnt);
+++}
+++
+++/**
+++ * cgroup_bpf_release() - put references of all bpf programs and
+++ *                        release all cgroup bpf data
+++ * @work: work structure embedded into the cgroup to modify
+++ */
+++static void cgroup_bpf_release(struct work_struct *work)
+++{
+++	struct cgroup *p, *cgrp = container_of(work, struct cgroup,
+++					       bpf.release_work);
+++	enum bpf_cgroup_storage_type stype;
+++	struct bpf_prog_array *old_array;
+++	unsigned int type;
+++
+++	mutex_lock(&cgroup_mutex);
+++
+++	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
+++		struct list_head *progs = &cgrp->bpf.progs[type];
+++		struct bpf_prog_list *pl, *tmp;
+++
+++		list_for_each_entry_safe(pl, tmp, progs, node) {
+++			list_del(&pl->node);
+++			bpf_prog_put(pl->prog);
+++			for_each_cgroup_storage_type(stype) {
+++				bpf_cgroup_storage_unlink(pl->storage[stype]);
+++				bpf_cgroup_storage_free(pl->storage[stype]);
+++			}
+++			kfree(pl);
+++			static_branch_dec(&cgroup_bpf_enabled_key);
+++		}
+++		old_array = rcu_dereference_protected(
+++				cgrp->bpf.effective[type],
+++				lockdep_is_held(&cgroup_mutex));
+++		bpf_prog_array_free(old_array);
+++	}
+++
+++	mutex_unlock(&cgroup_mutex);
+++
+++	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
+++		cgroup_bpf_put(p);
+++
+++	percpu_ref_exit(&cgrp->bpf.refcnt);
+++	cgroup_put(cgrp);
+++}
+++
+++/**
+++ * cgroup_bpf_release_fn() - callback used to schedule releasing
+++ *                           of bpf cgroup data
+++ * @ref: percpu ref counter structure
+++ */
+++static void cgroup_bpf_release_fn(struct percpu_ref *ref)
+++{
+++	struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
+++
+++	INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
+++	queue_work(system_wq, &cgrp->bpf.release_work);
+++}
+++
+++/* count number of elements in the list.
+++ * it's slow but the list cannot be long
+++ */
+++static u32 prog_list_length(struct list_head *head)
+++{
+++	struct bpf_prog_list *pl;
+++	u32 cnt = 0;
+++
+++	list_for_each_entry(pl, head, node) {
+++		if (!pl->prog)
+++			continue;
+++		cnt++;
+++	}
+++	return cnt;
+++}
+++
+++/* if parent has non-overridable prog attached,
+++ * disallow attaching new programs to the descendent cgroup.
+++ * if parent has overridable or multi-prog, allow attaching
+++ */
+++static bool hierarchy_allows_attach(struct cgroup *cgrp,
+++				    enum bpf_attach_type type,
+++				    u32 new_flags)
+++{
+++	struct cgroup *p;
+++
+++	p = cgroup_parent(cgrp);
+++	if (!p)
+++		return true;
+++	do {
+++		u32 flags = p->bpf.flags[type];
+++		u32 cnt;
+++
+++		if (flags & BPF_F_ALLOW_MULTI)
+++			return true;
+++		cnt = prog_list_length(&p->bpf.progs[type]);
+++		WARN_ON_ONCE(cnt > 1);
+++		if (cnt == 1)
+++			return !!(flags & BPF_F_ALLOW_OVERRIDE);
+++		p = cgroup_parent(p);
+++	} while (p);
+++	return true;
+++}
+++
+++/* compute a chain of effective programs for a given cgroup:
+++ * start from the list of programs in this cgroup and add
+++ * all parent programs.
+++ * Note that parent's F_ALLOW_OVERRIDE-type program is yielding
+++ * to programs in this cgroup
+++ */
+++static int compute_effective_progs(struct cgroup *cgrp,
+++				   enum bpf_attach_type type,
+++				   struct bpf_prog_array **array)
+++{
+++	enum bpf_cgroup_storage_type stype;
+++	struct bpf_prog_array *progs;
+++	struct bpf_prog_list *pl;
+++	struct cgroup *p = cgrp;
+++	int cnt = 0;
+++
+++	/* count number of effective programs by walking parents */
+++	do {
+++		if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+++			cnt += prog_list_length(&p->bpf.progs[type]);
+++		p = cgroup_parent(p);
+++	} while (p);
+++
+++	progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
+++	if (!progs)
+++		return -ENOMEM;
+++
+++	/* populate the array with effective progs */
+++	cnt = 0;
+++	p = cgrp;
+++	do {
+++		if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+++			continue;
+++
+++		list_for_each_entry(pl, &p->bpf.progs[type], node) {
+++			if (!pl->prog)
+++				continue;
+++
+++			progs->items[cnt].prog = pl->prog;
+++			for_each_cgroup_storage_type(stype)
+++				progs->items[cnt].cgroup_storage[stype] =
+++					pl->storage[stype];
+++			cnt++;
+++		}
+++	} while ((p = cgroup_parent(p)));
+++
+++	*array = progs;
+++	return 0;
+++}
+++
+++static void activate_effective_progs(struct cgroup *cgrp,
+++				     enum bpf_attach_type type,
+++				     struct bpf_prog_array *old_array)
+++{
+++	rcu_swap_protected(cgrp->bpf.effective[type], old_array,
+++			   lockdep_is_held(&cgroup_mutex));
+++	/* free prog array after grace period, since __cgroup_bpf_run_*()
+++	 * might be still walking the array
+++	 */
+++	bpf_prog_array_free(old_array);
+++}
+++
+++/**
+++ * cgroup_bpf_inherit() - inherit effective programs from parent
+++ * @cgrp: the cgroup to modify
+++ */
+++int cgroup_bpf_inherit(struct cgroup *cgrp)
+++{
+++/* has to use marco instead of const int, since compiler thinks
+++ * that array below is variable length
+++ */
+++#define	NR ARRAY_SIZE(cgrp->bpf.effective)
+++	struct bpf_prog_array *arrays[NR] = {};
+++	struct cgroup *p;
+++	int ret, i;
+++
+++	ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
+++			      GFP_KERNEL);
+++	if (ret)
+++		return ret;
+++
+++	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
+++		cgroup_bpf_get(p);
+++
+++	for (i = 0; i < NR; i++)
+++		INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
+++
+++	for (i = 0; i < NR; i++)
+++		if (compute_effective_progs(cgrp, i, &arrays[i]))
+++			goto cleanup;
+++
+++	for (i = 0; i < NR; i++)
+++		activate_effective_progs(cgrp, i, arrays[i]);
+++
+++	return 0;
+++cleanup:
+++	for (i = 0; i < NR; i++)
+++		bpf_prog_array_free(arrays[i]);
+++
+++	for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
+++		cgroup_bpf_put(p);
+++
+++	percpu_ref_exit(&cgrp->bpf.refcnt);
+++
+++	return -ENOMEM;
+++}
+++
+++static int update_effective_progs(struct cgroup *cgrp,
+++				  enum bpf_attach_type type)
+++{
+++	struct cgroup_subsys_state *css;
+++	int err;
+++
+++	/* allocate and recompute effective prog arrays */
+++	css_for_each_descendant_pre(css, &cgrp->self) {
+++		struct cgroup *desc = container_of(css, struct cgroup, self);
+++
+++		if (percpu_ref_is_zero(&desc->bpf.refcnt))
+++			continue;
+++
+++		err = compute_effective_progs(desc, type, &desc->bpf.inactive);
+++		if (err)
+++			goto cleanup;
+++	}
+++
+++	/* all allocations were successful. Activate all prog arrays */
+++	css_for_each_descendant_pre(css, &cgrp->self) {
+++		struct cgroup *desc = container_of(css, struct cgroup, self);
+++
+++		if (percpu_ref_is_zero(&desc->bpf.refcnt)) {
+++			if (unlikely(desc->bpf.inactive)) {
+++				bpf_prog_array_free(desc->bpf.inactive);
+++				desc->bpf.inactive = NULL;
+++			}
+++			continue;
+++		}
+++
+++		activate_effective_progs(desc, type, desc->bpf.inactive);
+++		desc->bpf.inactive = NULL;
+++	}
+++
+++	return 0;
+++
+++cleanup:
+++	/* oom while computing effective. Free all computed effective arrays
+++	 * since they were not activated
+++	 */
+++	css_for_each_descendant_pre(css, &cgrp->self) {
+++		struct cgroup *desc = container_of(css, struct cgroup, self);
+++
+++		bpf_prog_array_free(desc->bpf.inactive);
+++		desc->bpf.inactive = NULL;
+++	}
+++
+++	return err;
+++}
+++
+++#define BPF_CGROUP_MAX_PROGS 64
+++
+++/**
+++ * __cgroup_bpf_attach() - Attach the program to a cgroup, and
+++ *                         propagate the change to descendants
+++ * @cgrp: The cgroup which descendants to traverse
+++ * @prog: A program to attach
+++ * @type: Type of attach operation
+++ * @flags: Option flags
+++ *
+++ * Must be called with cgroup_mutex held.
+++ */
+++int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
+++			enum bpf_attach_type type, u32 flags)
+++{
+++	struct list_head *progs = &cgrp->bpf.progs[type];
+++	struct bpf_prog *old_prog = NULL;
+++	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+++	struct bpf_cgroup_storage *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+++	enum bpf_cgroup_storage_type stype;
+++	struct bpf_prog_list *pl;
+++	bool pl_was_allocated;
+++	int err;
+++
+++	if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
+++		/* invalid combination */
+++		return -EINVAL;
+++
+++	if (!hierarchy_allows_attach(cgrp, type, flags))
+++		return -EPERM;
+++
+++	if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
+++		/* Disallow attaching non-overridable on top
+++		 * of existing overridable in this cgroup.
+++		 * Disallow attaching multi-prog if overridable or none
+++		 */
+++		return -EPERM;
+++
+++	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
+++		return -E2BIG;
+++
+++	for_each_cgroup_storage_type(stype) {
+++		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+++		if (IS_ERR(storage[stype])) {
+++			storage[stype] = NULL;
+++			for_each_cgroup_storage_type(stype)
+++				bpf_cgroup_storage_free(storage[stype]);
+++			return -ENOMEM;
+++		}
+++	}
+++
+++	if (flags & BPF_F_ALLOW_MULTI) {
+++		list_for_each_entry(pl, progs, node) {
+++			if (pl->prog == prog) {
+++				/* disallow attaching the same prog twice */
+++				for_each_cgroup_storage_type(stype)
+++					bpf_cgroup_storage_free(storage[stype]);
+++				return -EINVAL;
+++			}
+++		}
+++
+++		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
+++		if (!pl) {
+++			for_each_cgroup_storage_type(stype)
+++				bpf_cgroup_storage_free(storage[stype]);
+++			return -ENOMEM;
+++		}
+++
+++		pl_was_allocated = true;
+++		pl->prog = prog;
+++		for_each_cgroup_storage_type(stype)
+++			pl->storage[stype] = storage[stype];
+++		list_add_tail(&pl->node, progs);
+++	} else {
+++		if (list_empty(progs)) {
+++			pl = kmalloc(sizeof(*pl), GFP_KERNEL);
+++			if (!pl) {
+++				for_each_cgroup_storage_type(stype)
+++					bpf_cgroup_storage_free(storage[stype]);
+++				return -ENOMEM;
+++			}
+++			pl_was_allocated = true;
+++			list_add_tail(&pl->node, progs);
+++		} else {
+++			pl = list_first_entry(progs, typeof(*pl), node);
+++			old_prog = pl->prog;
+++			for_each_cgroup_storage_type(stype) {
+++				old_storage[stype] = pl->storage[stype];
+++				bpf_cgroup_storage_unlink(old_storage[stype]);
+++			}
+++			pl_was_allocated = false;
+++		}
+++		pl->prog = prog;
+++		for_each_cgroup_storage_type(stype)
+++			pl->storage[stype] = storage[stype];
+++	}
+++
+++	cgrp->bpf.flags[type] = flags;
+++
+++	err = update_effective_progs(cgrp, type);
+++	if (err)
+++		goto cleanup;
+++
+++	static_branch_inc(&cgroup_bpf_enabled_key);
+++	for_each_cgroup_storage_type(stype) {
+++		if (!old_storage[stype])
+++			continue;
+++		bpf_cgroup_storage_free(old_storage[stype]);
+++	}
+++	if (old_prog) {
+++		bpf_prog_put(old_prog);
+++		static_branch_dec(&cgroup_bpf_enabled_key);
+++	}
+++	for_each_cgroup_storage_type(stype)
+++		bpf_cgroup_storage_link(storage[stype], cgrp, type);
+++	return 0;
+++
+++cleanup:
+++	/* and cleanup the prog list */
+++	pl->prog = old_prog;
+++	for_each_cgroup_storage_type(stype) {
+++		bpf_cgroup_storage_free(pl->storage[stype]);
+++		pl->storage[stype] = old_storage[stype];
+++		bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
+++	}
+++	if (pl_was_allocated) {
+++		list_del(&pl->node);
+++		kfree(pl);
+++	}
+++	return err;
+++}
+++
+++/**
+++ * __cgroup_bpf_detach() - Detach the program from a cgroup, and
+++ *                         propagate the change to descendants
+++ * @cgrp: The cgroup which descendants to traverse
+++ * @prog: A program to detach or NULL
+++ * @type: Type of detach operation
+++ *
+++ * Must be called with cgroup_mutex held.
+++ */
+++int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+++			enum bpf_attach_type type)
+++{
+++	struct list_head *progs = &cgrp->bpf.progs[type];
+++	enum bpf_cgroup_storage_type stype;
+++	u32 flags = cgrp->bpf.flags[type];
+++	struct bpf_prog *old_prog = NULL;
+++	struct bpf_prog_list *pl;
+++	int err;
+++
+++	if (flags & BPF_F_ALLOW_MULTI) {
+++		if (!prog)
+++			/* to detach MULTI prog the user has to specify valid FD
+++			 * of the program to be detached
+++			 */
+++			return -EINVAL;
+++	} else {
+++		if (list_empty(progs))
+++			/* report error when trying to detach and nothing is attached */
+++			return -ENOENT;
+++	}
+++
+++	if (flags & BPF_F_ALLOW_MULTI) {
+++		/* find the prog and detach it */
+++		list_for_each_entry(pl, progs, node) {
+++			if (pl->prog != prog)
+++				continue;
+++			old_prog = prog;
+++			/* mark it deleted, so it's ignored while
+++			 * recomputing effective
+++			 */
+++			pl->prog = NULL;
+++			break;
+++		}
+++		if (!old_prog)
+++			return -ENOENT;
+++	} else {
+++		/* to maintain backward compatibility NONE and OVERRIDE cgroups
+++		 * allow detaching with invalid FD (prog==NULL)
+++		 */
+++		pl = list_first_entry(progs, typeof(*pl), node);
+++		old_prog = pl->prog;
+++		pl->prog = NULL;
+++	}
+++
+++	err = update_effective_progs(cgrp, type);
+++	if (err)
+++		goto cleanup;
+++
+++	/* now can actually delete it from this cgroup list */
+++	list_del(&pl->node);
+++	for_each_cgroup_storage_type(stype) {
+++		bpf_cgroup_storage_unlink(pl->storage[stype]);
+++		bpf_cgroup_storage_free(pl->storage[stype]);
+++	}
+++	kfree(pl);
+++	if (list_empty(progs))
+++		/* last program was detached, reset flags to zero */
+++		cgrp->bpf.flags[type] = 0;
+++
+++	bpf_prog_put(old_prog);
+++	static_branch_dec(&cgroup_bpf_enabled_key);
+++	return 0;
+++
+++cleanup:
+++	/* and restore back old_prog */
+++	pl->prog = old_prog;
+++	return err;
+++}
+++
+++/* Must be called with cgroup_mutex held to avoid races. */
+++int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
+++		       union bpf_attr __user *uattr)
+++{
+++	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+++	enum bpf_attach_type type = attr->query.attach_type;
+++	struct list_head *progs = &cgrp->bpf.progs[type];
+++	u32 flags = cgrp->bpf.flags[type];
+++	struct bpf_prog_array *effective;
+++	int cnt, ret = 0, i;
+++
+++	effective = rcu_dereference_protected(cgrp->bpf.effective[type],
+++					      lockdep_is_held(&cgroup_mutex));
+++
+++	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
+++		cnt = bpf_prog_array_length(effective);
+++	else
+++		cnt = prog_list_length(progs);
+++
+++	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
+++		return -EFAULT;
+++	if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
+++		return -EFAULT;
+++	if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
+++		/* return early if user requested only program count + flags */
+++		return 0;
+++	if (attr->query.prog_cnt < cnt) {
+++		cnt = attr->query.prog_cnt;
+++		ret = -ENOSPC;
+++	}
+++
+++	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
+++		return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
+++	} else {
+++		struct bpf_prog_list *pl;
+++		u32 id;
+++
+++		i = 0;
+++		list_for_each_entry(pl, progs, node) {
+++			id = pl->prog->aux->id;
+++			if (copy_to_user(prog_ids + i, &id, sizeof(id)))
+++				return -EFAULT;
+++			if (++i == cnt)
+++				break;
+++		}
+++	}
+++	return ret;
+++}
+++
+++int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+++			   enum bpf_prog_type ptype, struct bpf_prog *prog)
+++{
+++	struct cgroup *cgrp;
+++	int ret;
+++
+++	cgrp = cgroup_get_from_fd(attr->target_fd);
+++	if (IS_ERR(cgrp))
+++		return PTR_ERR(cgrp);
+++
+++	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
+++				attr->attach_flags);
+++	cgroup_put(cgrp);
+++	return ret;
+++}
+++
+++int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
+++{
+++	struct bpf_prog *prog;
+++	struct cgroup *cgrp;
+++	int ret;
+++
+++	cgrp = cgroup_get_from_fd(attr->target_fd);
+++	if (IS_ERR(cgrp))
+++		return PTR_ERR(cgrp);
+++
+++	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+++	if (IS_ERR(prog))
+++		prog = NULL;
+++
+++	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
+++	if (prog)
+++		bpf_prog_put(prog);
+++
+++	cgroup_put(cgrp);
+++	return ret;
+++}
+++
+++int cgroup_bpf_prog_query(const union bpf_attr *attr,
+++			  union bpf_attr __user *uattr)
+++{
+++	struct cgroup *cgrp;
+++	int ret;
+++
+++	cgrp = cgroup_get_from_fd(attr->query.target_fd);
+++	if (IS_ERR(cgrp))
+++		return PTR_ERR(cgrp);
+++
+++	ret = cgroup_bpf_query(cgrp, attr, uattr);
+++
+++	cgroup_put(cgrp);
+++	return ret;
+++}
+++
+++/**
+++ * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
+++ * @sk: The socket sending or receiving traffic
+++ * @skb: The skb that is being sent or received
+++ * @type: The type of program to be exectuted
+++ *
+++ * If no socket is passed, or the socket is not of type INET or INET6,
+++ * this function does nothing and returns 0.
+++ *
+++ * The program type passed in via @type must be suitable for network
+++ * filtering. No further check is performed to assert that.
+++ *
+++ * For egress packets, this function can return:
+++ *   NET_XMIT_SUCCESS    (0)	- continue with packet output
+++ *   NET_XMIT_DROP       (1)	- drop packet and notify TCP to call cwr
+++ *   NET_XMIT_CN         (2)	- continue with packet output and notify TCP
+++ *				  to call cwr
+++ *   -EPERM			- drop packet
+++ *
+++ * For ingress packets, this function will return -EPERM if any
+++ * attached program was found and if it returned != 1 during execution.
+++ * Otherwise 0 is returned.
+++ */
+++int __cgroup_bpf_run_filter_skb(struct sock *sk,
+++				struct sk_buff *skb,
+++				enum bpf_attach_type type)
+++{
+++	unsigned int offset = skb->data - skb_network_header(skb);
+++	struct sock *save_sk;
+++	void *saved_data_end;
+++	struct cgroup *cgrp;
+++	int ret;
+++
+++	if (!sk || !sk_fullsock(sk))
+++		return 0;
+++
+++	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
+++		return 0;
+++
+++	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+++	save_sk = skb->sk;
+++	skb->sk = sk;
+++	__skb_push(skb, offset);
+++
+++	/* compute pointers for the bpf prog */
+++	bpf_compute_and_save_data_end(skb, &saved_data_end);
+++
+++	if (type == BPF_CGROUP_INET_EGRESS) {
+++		ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
+++			cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
+++	} else {
+++		ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
+++					  __bpf_prog_run_save_cb);
+++		ret = (ret == 1 ? 0 : -EPERM);
+++	}
+++	bpf_restore_data_end(skb, saved_data_end);
+++	__skb_pull(skb, offset);
+++	skb->sk = save_sk;
+++
+++	return ret;
+++}
+++EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
+++
+++/**
+++ * __cgroup_bpf_run_filter_sk() - Run a program on a sock
+++ * @sk: sock structure to manipulate
+++ * @type: The type of program to be exectuted
+++ *
+++ * socket is passed is expected to be of type INET or INET6.
+++ *
+++ * The program type passed in via @type must be suitable for sock
+++ * filtering. No further check is performed to assert that.
+++ *
+++ * This function will return %-EPERM if any if an attached program was found
+++ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+++ */
+++int __cgroup_bpf_run_filter_sk(struct sock *sk,
+++			       enum bpf_attach_type type)
+++{
+++	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+++	int ret;
+++
+++	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
+++	return ret == 1 ? 0 : -EPERM;
+++}
+++EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
+++
+++/**
+++ * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
+++ *                                       provided by user sockaddr
+++ * @sk: sock struct that will use sockaddr
+++ * @uaddr: sockaddr struct provided by user
+++ * @type: The type of program to be exectuted
+++ * @t_ctx: Pointer to attach type specific context
+++ *
+++ * socket is expected to be of type INET or INET6.
+++ *
+++ * This function will return %-EPERM if an attached program is found and
+++ * returned value != 1 during execution. In all other cases, 0 is returned.
+++ */
+++int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
+++				      struct sockaddr *uaddr,
+++				      enum bpf_attach_type type,
+++				      void *t_ctx)
+++{
+++	struct bpf_sock_addr_kern ctx = {
+++		.sk = sk,
+++		.uaddr = uaddr,
+++		.t_ctx = t_ctx,
+++	};
+++	struct sockaddr_storage unspec;
+++	struct cgroup *cgrp;
+++	int ret;
+++
+++	/* Check socket family since not all sockets represent network
+++	 * endpoint (e.g. AF_UNIX).
+++	 */
+++	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
+++		return 0;
+++
+++	if (!ctx.uaddr) {
+++		memset(&unspec, 0, sizeof(unspec));
+++		ctx.uaddr = (struct sockaddr *)&unspec;
+++	}
+++
+++	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+++	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+++
+++	return ret == 1 ? 0 : -EPERM;
+++}
+++EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
+++
+++/**
+++ * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
+++ * @sk: socket to get cgroup from
+++ * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
+++ * sk with connection information (IP addresses, etc.) May not contain
+++ * cgroup info if it is a req sock.
+++ * @type: The type of program to be exectuted
+++ *
+++ * socket passed is expected to be of type INET or INET6.
+++ *
+++ * The program type passed in via @type must be suitable for sock_ops
+++ * filtering. No further check is performed to assert that.
+++ *
+++ * This function will return %-EPERM if any if an attached program was found
+++ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+++ */
+++int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
+++				     struct bpf_sock_ops_kern *sock_ops,
+++				     enum bpf_attach_type type)
+++{
+++	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+++	int ret;
+++
+++	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
+++				 BPF_PROG_RUN);
+++	return ret == 1 ? 0 : -EPERM;
+++}
+++EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
+++
+++int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
+++				      short access, enum bpf_attach_type type)
+++{
+++	struct cgroup *cgrp;
+++	struct bpf_cgroup_dev_ctx ctx = {
+++		.access_type = (access << 16) | dev_type,
+++		.major = major,
+++		.minor = minor,
+++	};
+++	int allow = 1;
+++
+++	rcu_read_lock();
+++	cgrp = task_dfl_cgroup(current);
+++	allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
+++				   BPF_PROG_RUN);
+++	rcu_read_unlock();
+++
+++	return !allow;
+++}
+++EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
+++
+++static const struct bpf_func_proto *
+++cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	switch (func_id) {
+++	case BPF_FUNC_map_lookup_elem:
+++		return &bpf_map_lookup_elem_proto;
+++	case BPF_FUNC_map_update_elem:
+++		return &bpf_map_update_elem_proto;
+++	case BPF_FUNC_map_delete_elem:
+++		return &bpf_map_delete_elem_proto;
+++	case BPF_FUNC_map_push_elem:
+++		return &bpf_map_push_elem_proto;
+++	case BPF_FUNC_map_pop_elem:
+++		return &bpf_map_pop_elem_proto;
+++	case BPF_FUNC_map_peek_elem:
+++		return &bpf_map_peek_elem_proto;
+++	case BPF_FUNC_get_current_uid_gid:
+++		return &bpf_get_current_uid_gid_proto;
+++	case BPF_FUNC_get_local_storage:
+++		return &bpf_get_local_storage_proto;
+++	case BPF_FUNC_get_current_cgroup_id:
+++		return &bpf_get_current_cgroup_id_proto;
+++	case BPF_FUNC_trace_printk:
+++		if (capable(CAP_SYS_ADMIN))
+++			return bpf_get_trace_printk_proto();
+++		/* fall through */
+++	default:
+++		return NULL;
+++	}
+++}
+++
+++static const struct bpf_func_proto *
+++cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	return cgroup_base_func_proto(func_id, prog);
+++}
+++
+++static bool cgroup_dev_is_valid_access(int off, int size,
+++				       enum bpf_access_type type,
+++				       const struct bpf_prog *prog,
+++				       struct bpf_insn_access_aux *info)
+++{
+++	const int size_default = sizeof(__u32);
+++
+++	if (type == BPF_WRITE)
+++		return false;
+++
+++	if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
+++		return false;
+++	/* The verifier guarantees that size > 0. */
+++	if (off % size != 0)
+++		return false;
+++
+++	switch (off) {
+++	case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
+++		bpf_ctx_record_field_size(info, size_default);
+++		if (!bpf_ctx_narrow_access_ok(off, size, size_default))
+++			return false;
+++		break;
+++	default:
+++		if (size != size_default)
+++			return false;
+++	}
+++
+++	return true;
+++}
+++
+++const struct bpf_prog_ops cg_dev_prog_ops = {
+++};
+++
+++const struct bpf_verifier_ops cg_dev_verifier_ops = {
+++	.get_func_proto		= cgroup_dev_func_proto,
+++	.is_valid_access	= cgroup_dev_is_valid_access,
+++};
+++
+++/**
+++ * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
+++ *
+++ * @head: sysctl table header
+++ * @table: sysctl table
+++ * @write: sysctl is being read (= 0) or written (= 1)
+++ * @buf: pointer to buffer passed by user space
+++ * @pcount: value-result argument: value is size of buffer pointed to by @buf,
+++ *	result is size of @new_buf if program set new value, initial value
+++ *	otherwise
+++ * @ppos: value-result argument: value is position at which read from or write
+++ *	to sysctl is happening, result is new position if program overrode it,
+++ *	initial value otherwise
+++ * @new_buf: pointer to pointer to new buffer that will be allocated if program
+++ *	overrides new value provided by user space on sysctl write
+++ *	NOTE: it's caller responsibility to free *new_buf if it was set
+++ * @type: type of program to be executed
+++ *
+++ * Program is run when sysctl is being accessed, either read or written, and
+++ * can allow or deny such access.
+++ *
+++ * This function will return %-EPERM if an attached program is found and
+++ * returned value != 1 during execution. In all other cases 0 is returned.
+++ */
+++int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
+++				   struct ctl_table *table, int write,
+++				   void __user *buf, size_t *pcount,
+++				   loff_t *ppos, void **new_buf,
+++				   enum bpf_attach_type type)
+++{
+++	struct bpf_sysctl_kern ctx = {
+++		.head = head,
+++		.table = table,
+++		.write = write,
+++		.ppos = ppos,
+++		.cur_val = NULL,
+++		.cur_len = PAGE_SIZE,
+++		.new_val = NULL,
+++		.new_len = 0,
+++		.new_updated = 0,
+++	};
+++	struct cgroup *cgrp;
+++	int ret;
+++
+++	ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);
+++	if (ctx.cur_val) {
+++		mm_segment_t old_fs;
+++		loff_t pos = 0;
+++
+++		old_fs = get_fs();
+++		set_fs(KERNEL_DS);
+++		if (table->proc_handler(table, 0, (void __user *)ctx.cur_val,
+++					&ctx.cur_len, &pos)) {
+++			/* Let BPF program decide how to proceed. */
+++			ctx.cur_len = 0;
+++		}
+++		set_fs(old_fs);
+++	} else {
+++		/* Let BPF program decide how to proceed. */
+++		ctx.cur_len = 0;
+++	}
+++
+++	if (write && buf && *pcount) {
+++		/* BPF program should be able to override new value with a
+++		 * buffer bigger than provided by user.
+++		 */
+++		ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
+++		ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount);
+++		if (!ctx.new_val ||
+++		    copy_from_user(ctx.new_val, buf, ctx.new_len))
+++			/* Let BPF program decide how to proceed. */
+++			ctx.new_len = 0;
+++	}
+++
+++	rcu_read_lock();
+++	cgrp = task_dfl_cgroup(current);
+++	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+++	rcu_read_unlock();
+++
+++	kfree(ctx.cur_val);
+++
+++	if (ret == 1 && ctx.new_updated) {
+++		*new_buf = ctx.new_val;
+++		*pcount = ctx.new_len;
+++	} else {
+++		kfree(ctx.new_val);
+++	}
+++
+++	return ret == 1 ? 0 : -EPERM;
+++}
+++EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
+++
+++#ifdef CONFIG_NET
+++static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
+++					     enum bpf_attach_type attach_type)
+++{
+++	struct bpf_prog_array *prog_array;
+++	bool empty;
+++
+++	rcu_read_lock();
+++	prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);
+++	empty = bpf_prog_array_is_empty(prog_array);
+++	rcu_read_unlock();
+++
+++	return empty;
+++}
+++
+++static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
+++{
+++	if (unlikely(max_optlen < 0))
+++		return -EINVAL;
+++
+++	if (unlikely(max_optlen > PAGE_SIZE)) {
+++		/* We don't expose optvals that are greater than PAGE_SIZE
+++		 * to the BPF program.
+++		 */
+++		max_optlen = PAGE_SIZE;
+++	}
+++
+++	ctx->optval = kzalloc(max_optlen, GFP_USER);
+++	if (!ctx->optval)
+++		return -ENOMEM;
+++
+++	ctx->optval_end = ctx->optval + max_optlen;
+++
+++	return max_optlen;
+++}
+++
+++static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
+++{
+++	kfree(ctx->optval);
+++}
+++
+++int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
+++				       int *optname, char __user *optval,
+++				       int *optlen, char **kernel_optval)
+++{
+++	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+++	struct bpf_sockopt_kern ctx = {
+++		.sk = sk,
+++		.level = *level,
+++		.optname = *optname,
+++	};
+++	int ret, max_optlen;
+++
+++	/* Opportunistic check to see whether we have any BPF program
+++	 * attached to the hook so we don't waste time allocating
+++	 * memory and locking the socket.
+++	 */
+++	if (!cgroup_bpf_enabled ||
+++	    __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))
+++		return 0;
+++
+++	/* Allocate a bit more than the initial user buffer for
+++	 * BPF program. The canonical use case is overriding
+++	 * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
+++	 */
+++	max_optlen = max_t(int, 16, *optlen);
+++
+++	max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
+++	if (max_optlen < 0)
+++		return max_optlen;
+++
+++	ctx.optlen = *optlen;
+++
+++	if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {
+++		ret = -EFAULT;
+++		goto out;
+++	}
+++
+++	lock_sock(sk);
+++	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
+++				 &ctx, BPF_PROG_RUN);
+++	release_sock(sk);
+++
+++	if (!ret) {
+++		ret = -EPERM;
+++		goto out;
+++	}
+++
+++	if (ctx.optlen == -1) {
+++		/* optlen set to -1, bypass kernel */
+++		ret = 1;
+++	} else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
+++		/* optlen is out of bounds */
+++		ret = -EFAULT;
+++	} else {
+++		/* optlen within bounds, run kernel handler */
+++		ret = 0;
+++
+++		/* export any potential modifications */
+++		*level = ctx.level;
+++		*optname = ctx.optname;
+++
+++		/* optlen == 0 from BPF indicates that we should
+++		 * use original userspace data.
+++		 */
+++		if (ctx.optlen != 0) {
+++			*optlen = ctx.optlen;
+++			*kernel_optval = ctx.optval;
+++			/* export and don't free sockopt buf */
+++			return 0;
+++		}
+++	}
+++
+++out:
+++	sockopt_free_buf(&ctx);
+++	return ret;
+++}
+++EXPORT_SYMBOL(__cgroup_bpf_run_filter_setsockopt);
+++
+++int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
+++				       int optname, char __user *optval,
+++				       int __user *optlen, int max_optlen,
+++				       int retval)
+++{
+++	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+++	struct bpf_sockopt_kern ctx = {
+++		.sk = sk,
+++		.level = level,
+++		.optname = optname,
+++		.retval = retval,
+++	};
+++	int ret;
+++
+++	/* Opportunistic check to see whether we have any BPF program
+++	 * attached to the hook so we don't waste time allocating
+++	 * memory and locking the socket.
+++	 */
+++	if (!cgroup_bpf_enabled ||
+++	    __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
+++		return retval;
+++
+++	ctx.optlen = max_optlen;
+++
+++	max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
+++	if (max_optlen < 0)
+++		return max_optlen;
+++
+++	if (!retval) {
+++		/* If kernel getsockopt finished successfully,
+++		 * copy whatever was returned to the user back
+++		 * into our temporary buffer. Set optlen to the
+++		 * one that kernel returned as well to let
+++		 * BPF programs inspect the value.
+++		 */
+++
+++		if (get_user(ctx.optlen, optlen)) {
+++			ret = -EFAULT;
+++			goto out;
+++		}
+++
+++		if (ctx.optlen < 0) {
+++			ret = -EFAULT;
+++			goto out;
+++		}
+++
+++		if (copy_from_user(ctx.optval, optval,
+++				   min(ctx.optlen, max_optlen)) != 0) {
+++			ret = -EFAULT;
+++			goto out;
+++		}
+++	}
+++
+++	lock_sock(sk);
+++	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
+++				 &ctx, BPF_PROG_RUN);
+++	release_sock(sk);
+++
+++	if (!ret) {
+++		ret = -EPERM;
+++		goto out;
+++	}
+++
+++	if (ctx.optlen > max_optlen || ctx.optlen < 0) {
+++		ret = -EFAULT;
+++		goto out;
+++	}
+++
+++	/* BPF programs only allowed to set retval to 0, not some
+++	 * arbitrary value.
+++	 */
+++	if (ctx.retval != 0 && ctx.retval != retval) {
+++		ret = -EFAULT;
+++		goto out;
+++	}
+++
+++	if (ctx.optlen != 0) {
+++		if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
+++		    put_user(ctx.optlen, optlen)) {
+++			ret = -EFAULT;
+++			goto out;
+++		}
+++	}
+++
+++	ret = ctx.retval;
+++
+++out:
+++	sockopt_free_buf(&ctx);
+++	return ret;
+++}
+++EXPORT_SYMBOL(__cgroup_bpf_run_filter_getsockopt);
+++#endif
+++
+++static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
+++			      size_t *lenp)
+++{
+++	ssize_t tmp_ret = 0, ret;
+++
+++	if (dir->header.parent) {
+++		tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp);
+++		if (tmp_ret < 0)
+++			return tmp_ret;
+++	}
+++
+++	ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp);
+++	if (ret < 0)
+++		return ret;
+++	*bufp += ret;
+++	*lenp -= ret;
+++	ret += tmp_ret;
+++
+++	/* Avoid leading slash. */
+++	if (!ret)
+++		return ret;
+++
+++	tmp_ret = strscpy(*bufp, "/", *lenp);
+++	if (tmp_ret < 0)
+++		return tmp_ret;
+++	*bufp += tmp_ret;
+++	*lenp -= tmp_ret;
+++
+++	return ret + tmp_ret;
+++}
+++
+++BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf,
+++	   size_t, buf_len, u64, flags)
+++{
+++	ssize_t tmp_ret = 0, ret;
+++
+++	if (!buf)
+++		return -EINVAL;
+++
+++	if (!(flags & BPF_F_SYSCTL_BASE_NAME)) {
+++		if (!ctx->head)
+++			return -EINVAL;
+++		tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len);
+++		if (tmp_ret < 0)
+++			return tmp_ret;
+++	}
+++
+++	ret = strscpy(buf, ctx->table->procname, buf_len);
+++
+++	return ret < 0 ? ret : tmp_ret + ret;
+++}
+++
+++static const struct bpf_func_proto bpf_sysctl_get_name_proto = {
+++	.func		= bpf_sysctl_get_name,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_PTR_TO_MEM,
+++	.arg3_type	= ARG_CONST_SIZE,
+++	.arg4_type	= ARG_ANYTHING,
+++};
+++
+++static int copy_sysctl_value(char *dst, size_t dst_len, char *src,
+++			     size_t src_len)
+++{
+++	if (!dst)
+++		return -EINVAL;
+++
+++	if (!dst_len)
+++		return -E2BIG;
+++
+++	if (!src || !src_len) {
+++		memset(dst, 0, dst_len);
+++		return -EINVAL;
+++	}
+++
+++	memcpy(dst, src, min(dst_len, src_len));
+++
+++	if (dst_len > src_len) {
+++		memset(dst + src_len, '\0', dst_len - src_len);
+++		return src_len;
+++	}
+++
+++	dst[dst_len - 1] = '\0';
+++
+++	return -E2BIG;
+++}
+++
+++BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,
+++	   char *, buf, size_t, buf_len)
+++{
+++	return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len);
+++}
+++
+++static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
+++	.func		= bpf_sysctl_get_current_value,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
+++	.arg3_type	= ARG_CONST_SIZE,
+++};
+++
+++BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
+++	   size_t, buf_len)
+++{
+++	if (!ctx->write) {
+++		if (buf && buf_len)
+++			memset(buf, '\0', buf_len);
+++		return -EINVAL;
+++	}
+++	return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
+++}
+++
+++static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
+++	.func		= bpf_sysctl_get_new_value,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
+++	.arg3_type	= ARG_CONST_SIZE,
+++};
+++
+++BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
+++	   const char *, buf, size_t, buf_len)
+++{
+++	if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
+++		return -EINVAL;
+++
+++	if (buf_len > PAGE_SIZE - 1)
+++		return -E2BIG;
+++
+++	memcpy(ctx->new_val, buf, buf_len);
+++	ctx->new_len = buf_len;
+++	ctx->new_updated = 1;
+++
+++	return 0;
+++}
+++
+++static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
+++	.func		= bpf_sysctl_set_new_value,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_PTR_TO_MEM,
+++	.arg3_type	= ARG_CONST_SIZE,
+++};
+++
+++static const struct bpf_func_proto *
+++sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	switch (func_id) {
+++	case BPF_FUNC_strtol:
+++		return &bpf_strtol_proto;
+++	case BPF_FUNC_strtoul:
+++		return &bpf_strtoul_proto;
+++	case BPF_FUNC_sysctl_get_name:
+++		return &bpf_sysctl_get_name_proto;
+++	case BPF_FUNC_sysctl_get_current_value:
+++		return &bpf_sysctl_get_current_value_proto;
+++	case BPF_FUNC_sysctl_get_new_value:
+++		return &bpf_sysctl_get_new_value_proto;
+++	case BPF_FUNC_sysctl_set_new_value:
+++		return &bpf_sysctl_set_new_value_proto;
+++	default:
+++		return cgroup_base_func_proto(func_id, prog);
+++	}
+++}
+++
+++static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
+++				   const struct bpf_prog *prog,
+++				   struct bpf_insn_access_aux *info)
+++{
+++	const int size_default = sizeof(__u32);
+++
+++	if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size)
+++		return false;
+++
+++	switch (off) {
+++	case bpf_ctx_range(struct bpf_sysctl, write):
+++		if (type != BPF_READ)
+++			return false;
+++		bpf_ctx_record_field_size(info, size_default);
+++		return bpf_ctx_narrow_access_ok(off, size, size_default);
+++	case bpf_ctx_range(struct bpf_sysctl, file_pos):
+++		if (type == BPF_READ) {
+++			bpf_ctx_record_field_size(info, size_default);
+++			return bpf_ctx_narrow_access_ok(off, size, size_default);
+++		} else {
+++			return size == size_default;
+++		}
+++	default:
+++		return false;
+++	}
+++}
+++
+++static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
+++				     const struct bpf_insn *si,
+++				     struct bpf_insn *insn_buf,
+++				     struct bpf_prog *prog, u32 *target_size)
+++{
+++	struct bpf_insn *insn = insn_buf;
+++	u32 read_size;
+++
+++	switch (si->off) {
+++	case offsetof(struct bpf_sysctl, write):
+++		*insn++ = BPF_LDX_MEM(
+++			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+++			bpf_target_off(struct bpf_sysctl_kern, write,
+++				       FIELD_SIZEOF(struct bpf_sysctl_kern,
+++						    write),
+++				       target_size));
+++		break;
+++	case offsetof(struct bpf_sysctl, file_pos):
+++		/* ppos is a pointer so it should be accessed via indirect
+++		 * loads and stores. Also for stores additional temporary
+++		 * register is used since neither src_reg nor dst_reg can be
+++		 * overridden.
+++		 */
+++		if (type == BPF_WRITE) {
+++			int treg = BPF_REG_9;
+++
+++			if (si->src_reg == treg || si->dst_reg == treg)
+++				--treg;
+++			if (si->src_reg == treg || si->dst_reg == treg)
+++				--treg;
+++			*insn++ = BPF_STX_MEM(
+++				BPF_DW, si->dst_reg, treg,
+++				offsetof(struct bpf_sysctl_kern, tmp_reg));
+++			*insn++ = BPF_LDX_MEM(
+++				BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
+++				treg, si->dst_reg,
+++				offsetof(struct bpf_sysctl_kern, ppos));
+++			*insn++ = BPF_STX_MEM(
+++				BPF_SIZEOF(u32), treg, si->src_reg,
+++				bpf_ctx_narrow_access_offset(
+++					0, sizeof(u32), sizeof(loff_t)));
+++			*insn++ = BPF_LDX_MEM(
+++				BPF_DW, treg, si->dst_reg,
+++				offsetof(struct bpf_sysctl_kern, tmp_reg));
+++		} else {
+++			*insn++ = BPF_LDX_MEM(
+++				BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
+++				si->dst_reg, si->src_reg,
+++				offsetof(struct bpf_sysctl_kern, ppos));
+++			read_size = bpf_size_to_bytes(BPF_SIZE(si->code));
+++			*insn++ = BPF_LDX_MEM(
+++				BPF_SIZE(si->code), si->dst_reg, si->dst_reg,
+++				bpf_ctx_narrow_access_offset(
+++					0, read_size, sizeof(loff_t)));
+++		}
+++		*target_size = sizeof(u32);
+++		break;
+++	}
+++
+++	return insn - insn_buf;
+++}
+++
+++const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
+++	.get_func_proto		= sysctl_func_proto,
+++	.is_valid_access	= sysctl_is_valid_access,
+++	.convert_ctx_access	= sysctl_convert_ctx_access,
+++};
+++
+++const struct bpf_prog_ops cg_sysctl_prog_ops = {
+++};
+++
+++static const struct bpf_func_proto *
+++cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	switch (func_id) {
+++#ifdef CONFIG_NET
+++	case BPF_FUNC_sk_storage_get:
+++		return &bpf_sk_storage_get_proto;
+++	case BPF_FUNC_sk_storage_delete:
+++		return &bpf_sk_storage_delete_proto;
+++#endif
+++#ifdef CONFIG_INET
+++	case BPF_FUNC_tcp_sock:
+++		return &bpf_tcp_sock_proto;
+++#endif
+++	default:
+++		return cgroup_base_func_proto(func_id, prog);
+++	}
+++}
+++
+++static bool cg_sockopt_is_valid_access(int off, int size,
+++				       enum bpf_access_type type,
+++				       const struct bpf_prog *prog,
+++				       struct bpf_insn_access_aux *info)
+++{
+++	const int size_default = sizeof(__u32);
+++
+++	if (off < 0 || off >= sizeof(struct bpf_sockopt))
+++		return false;
+++
+++	if (off % size != 0)
+++		return false;
+++
+++	if (type == BPF_WRITE) {
+++		switch (off) {
+++		case offsetof(struct bpf_sockopt, retval):
+++			if (size != size_default)
+++				return false;
+++			return prog->expected_attach_type ==
+++				BPF_CGROUP_GETSOCKOPT;
+++		case offsetof(struct bpf_sockopt, optname):
+++			/* fallthrough */
+++		case offsetof(struct bpf_sockopt, level):
+++			if (size != size_default)
+++				return false;
+++			return prog->expected_attach_type ==
+++				BPF_CGROUP_SETSOCKOPT;
+++		case offsetof(struct bpf_sockopt, optlen):
+++			return size == size_default;
+++		default:
+++			return false;
+++		}
+++	}
+++
+++	switch (off) {
+++	case offsetof(struct bpf_sockopt, sk):
+++		if (size != sizeof(__u64))
+++			return false;
+++		info->reg_type = PTR_TO_SOCKET;
+++		break;
+++	case offsetof(struct bpf_sockopt, optval):
+++		if (size != sizeof(__u64))
+++			return false;
+++		info->reg_type = PTR_TO_PACKET;
+++		break;
+++	case offsetof(struct bpf_sockopt, optval_end):
+++		if (size != sizeof(__u64))
+++			return false;
+++		info->reg_type = PTR_TO_PACKET_END;
+++		break;
+++	case offsetof(struct bpf_sockopt, retval):
+++		if (size != size_default)
+++			return false;
+++		return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT;
+++	default:
+++		if (size != size_default)
+++			return false;
+++		break;
+++	}
+++	return true;
+++}
+++
+++#define CG_SOCKOPT_ACCESS_FIELD(T, F)					\
+++	T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F),			\
+++	  si->dst_reg, si->src_reg,					\
+++	  offsetof(struct bpf_sockopt_kern, F))
+++
+++static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
+++					 const struct bpf_insn *si,
+++					 struct bpf_insn *insn_buf,
+++					 struct bpf_prog *prog,
+++					 u32 *target_size)
+++{
+++	struct bpf_insn *insn = insn_buf;
+++
+++	switch (si->off) {
+++	case offsetof(struct bpf_sockopt, sk):
+++		*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk);
+++		break;
+++	case offsetof(struct bpf_sockopt, level):
+++		if (type == BPF_WRITE)
+++			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level);
+++		else
+++			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level);
+++		break;
+++	case offsetof(struct bpf_sockopt, optname):
+++		if (type == BPF_WRITE)
+++			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname);
+++		else
+++			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname);
+++		break;
+++	case offsetof(struct bpf_sockopt, optlen):
+++		if (type == BPF_WRITE)
+++			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen);
+++		else
+++			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
+++		break;
+++	case offsetof(struct bpf_sockopt, retval):
+++		if (type == BPF_WRITE)
+++			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
+++		else
+++			*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
+++		break;
+++	case offsetof(struct bpf_sockopt, optval):
+++		*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
+++		break;
+++	case offsetof(struct bpf_sockopt, optval_end):
+++		*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end);
+++		break;
+++	}
+++
+++	return insn - insn_buf;
+++}
+++
+++static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf,
+++				   bool direct_write,
+++				   const struct bpf_prog *prog)
+++{
+++	/* Nothing to do for sockopt argument. The data is kzalloc'ated.
+++	 */
+++	return 0;
+++}
+++
+++const struct bpf_verifier_ops cg_sockopt_verifier_ops = {
+++	.get_func_proto		= cg_sockopt_func_proto,
+++	.is_valid_access	= cg_sockopt_is_valid_access,
+++	.convert_ctx_access	= cg_sockopt_convert_ctx_access,
+++	.gen_prologue		= cg_sockopt_get_prologue,
+++};
+++
+++const struct bpf_prog_ops cg_sockopt_prog_ops = {
+++};
++--- a/kernel/bpf/core.c
+++++ b/kernel/bpf/core.c
++@@ -1,3 +1,4 @@
+++// SPDX-License-Identifier: GPL-2.0-or-later
++ /*
++  * Linux Socket Filter - Kernel level socket filtering
++  *
++@@ -12,21 +13,22 @@
++  *	Alexei Starovoitov <ast@plumgrid.com>
++  *	Daniel Borkmann <dborkman@redhat.com>
++  *
++- * This program is free software; you can redistribute it and/or
++- * modify it under the terms of the GNU General Public License
++- * as published by the Free Software Foundation; either version
++- * 2 of the License, or (at your option) any later version.
++- *
++  * Andi Kleen - Fix a few bad bugs and races.
++  * Kris Katterjohn - Added many additional checks in bpf_check_classic()
++  */
++ 
+++#include <uapi/linux/btf.h>
++ #include <linux/filter.h>
++ #include <linux/skbuff.h>
++ #include <linux/vmalloc.h>
++ #include <linux/random.h>
++ #include <linux/moduleloader.h>
++ #include <linux/bpf.h>
+++#include <linux/btf.h>
+++#include <linux/rbtree_latch.h>
+++#include <linux/kallsyms.h>
+++#include <linux/rcupdate.h>
+++#include <linux/perf_event.h>
++ 
++ #include <asm/unaligned.h>
++ 
++@@ -47,6 +49,7 @@
++ #define DST	regs[insn->dst_reg]
++ #define SRC	regs[insn->src_reg]
++ #define FP	regs[BPF_REG_FP]
+++#define AX	regs[BPF_REG_AX]
++ #define ARG1	regs[BPF_REG_ARG1]
++ #define CTX	regs[BPF_REG_CTX]
++ #define IMM	insn->imm
++@@ -70,10 +73,9 @@ void *bpf_internal_load_pointer_neg_help
++ 	return NULL;
++ }
++ 
++-struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
+++struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
++ {
++-	gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
++-			  gfp_extra_flags;
+++	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
++ 	struct bpf_prog_aux *aux;
++ 	struct bpf_prog *fp;
++ 
++@@ -82,8 +84,6 @@ struct bpf_prog *bpf_prog_alloc(unsigned
++ 	if (fp == NULL)
++ 		return NULL;
++ 
++-	kmemcheck_annotate_bitfield(fp, meta);
++-
++ 	aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
++ 	if (aux == NULL) {
++ 		vfree(fp);
++@@ -93,30 +93,151 @@ struct bpf_prog *bpf_prog_alloc(unsigned
++ 	fp->pages = size / PAGE_SIZE;
++ 	fp->aux = aux;
++ 	fp->aux->prog = fp;
+++	fp->jit_requested = ebpf_jit_enabled();
+++
+++	INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode);
++ 
++ 	return fp;
++ }
+++
+++struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
+++{
+++	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+++	struct bpf_prog *prog;
+++	int cpu;
+++
+++	prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
+++	if (!prog)
+++		return NULL;
+++
+++	prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
+++	if (!prog->aux->stats) {
+++		kfree(prog->aux);
+++		vfree(prog);
+++		return NULL;
+++	}
+++
+++	for_each_possible_cpu(cpu) {
+++		struct bpf_prog_stats *pstats;
+++
+++		pstats = per_cpu_ptr(prog->aux->stats, cpu);
+++		u64_stats_init(&pstats->syncp);
+++	}
+++	return prog;
+++}
++ EXPORT_SYMBOL_GPL(bpf_prog_alloc);
++ 
+++int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
+++{
+++	if (!prog->aux->nr_linfo || !prog->jit_requested)
+++		return 0;
+++
+++	prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo,
+++					 sizeof(*prog->aux->jited_linfo),
+++					 GFP_KERNEL | __GFP_NOWARN);
+++	if (!prog->aux->jited_linfo)
+++		return -ENOMEM;
+++
+++	return 0;
+++}
+++
+++void bpf_prog_free_jited_linfo(struct bpf_prog *prog)
+++{
+++	kfree(prog->aux->jited_linfo);
+++	prog->aux->jited_linfo = NULL;
+++}
+++
+++void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog)
+++{
+++	if (prog->aux->jited_linfo && !prog->aux->jited_linfo[0])
+++		bpf_prog_free_jited_linfo(prog);
+++}
+++
+++/* The jit engine is responsible to provide an array
+++ * for insn_off to the jited_off mapping (insn_to_jit_off).
+++ *
+++ * The idx to this array is the insn_off.  Hence, the insn_off
+++ * here is relative to the prog itself instead of the main prog.
+++ * This array has one entry for each xlated bpf insn.
+++ *
+++ * jited_off is the byte off to the last byte of the jited insn.
+++ *
+++ * Hence, with
+++ * insn_start:
+++ *      The first bpf insn off of the prog.  The insn off
+++ *      here is relative to the main prog.
+++ *      e.g. if prog is a subprog, insn_start > 0
+++ * linfo_idx:
+++ *      The prog's idx to prog->aux->linfo and jited_linfo
+++ *
+++ * jited_linfo[linfo_idx] = prog->bpf_func
+++ *
+++ * For i > linfo_idx,
+++ *
+++ * jited_linfo[i] = prog->bpf_func +
+++ *	insn_to_jit_off[linfo[i].insn_off - insn_start - 1]
+++ */
+++void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
+++			       const u32 *insn_to_jit_off)
+++{
+++	u32 linfo_idx, insn_start, insn_end, nr_linfo, i;
+++	const struct bpf_line_info *linfo;
+++	void **jited_linfo;
+++
+++	if (!prog->aux->jited_linfo)
+++		/* Userspace did not provide linfo */
+++		return;
+++
+++	linfo_idx = prog->aux->linfo_idx;
+++	linfo = &prog->aux->linfo[linfo_idx];
+++	insn_start = linfo[0].insn_off;
+++	insn_end = insn_start + prog->len;
+++
+++	jited_linfo = &prog->aux->jited_linfo[linfo_idx];
+++	jited_linfo[0] = prog->bpf_func;
+++
+++	nr_linfo = prog->aux->nr_linfo - linfo_idx;
+++
+++	for (i = 1; i < nr_linfo && linfo[i].insn_off < insn_end; i++)
+++		/* The verifier ensures that linfo[i].insn_off is
+++		 * strictly increasing
+++		 */
+++		jited_linfo[i] = prog->bpf_func +
+++			insn_to_jit_off[linfo[i].insn_off - insn_start - 1];
+++}
+++
+++void bpf_prog_free_linfo(struct bpf_prog *prog)
+++{
+++	bpf_prog_free_jited_linfo(prog);
+++	kvfree(prog->aux->linfo);
+++}
+++
++ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
++ 				  gfp_t gfp_extra_flags)
++ {
++-	gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
++-			  gfp_extra_flags;
+++	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
++ 	struct bpf_prog *fp;
+++	u32 pages, delta;
+++	int ret;
++ 
++ 	BUG_ON(fp_old == NULL);
++ 
++ 	size = round_up(size, PAGE_SIZE);
++-	if (size <= fp_old->pages * PAGE_SIZE)
+++	pages = size / PAGE_SIZE;
+++	if (pages <= fp_old->pages)
++ 		return fp_old;
++ 
++-	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
++-	if (fp != NULL) {
++-		kmemcheck_annotate_bitfield(fp, meta);
+++	delta = pages - fp_old->pages;
+++	ret = __bpf_prog_charge(fp_old->aux->user, delta);
+++	if (ret)
+++		return NULL;
++ 
+++	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+++	if (fp == NULL) {
+++		__bpf_prog_uncharge(fp_old->aux->user, delta);
+++	} else {
++ 		memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
++-		fp->pages = size / PAGE_SIZE;
+++		fp->pages = pages;
++ 		fp->aux->prog = fp;
++ 
++ 		/* We keep fp->aux from fp_old around in the new
++@@ -128,40 +249,578 @@ struct bpf_prog *bpf_prog_realloc(struct
++ 
++ 	return fp;
++ }
++-EXPORT_SYMBOL_GPL(bpf_prog_realloc);
++ 
++ void __bpf_prog_free(struct bpf_prog *fp)
++ {
++-	kfree(fp->aux);
+++	if (fp->aux) {
+++		free_percpu(fp->aux->stats);
+++		kfree(fp->aux);
+++	}
++ 	vfree(fp);
++ }
++-EXPORT_SYMBOL_GPL(__bpf_prog_free);
+++
+++int bpf_prog_calc_tag(struct bpf_prog *fp)
+++{
+++	const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64);
+++	u32 raw_size = bpf_prog_tag_scratch_size(fp);
+++	u32 digest[SHA_DIGEST_WORDS];
+++	u32 ws[SHA_WORKSPACE_WORDS];
+++	u32 i, bsize, psize, blocks;
+++	struct bpf_insn *dst;
+++	bool was_ld_map;
+++	u8 *raw, *todo;
+++	__be32 *result;
+++	__be64 *bits;
+++
+++	raw = vmalloc(raw_size);
+++	if (!raw)
+++		return -ENOMEM;
+++
+++	sha_init(digest);
+++	memset(ws, 0, sizeof(ws));
+++
+++	/* We need to take out the map fd for the digest calculation
+++	 * since they are unstable from user space side.
+++	 */
+++	dst = (void *)raw;
+++	for (i = 0, was_ld_map = false; i < fp->len; i++) {
+++		dst[i] = fp->insnsi[i];
+++		if (!was_ld_map &&
+++		    dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
+++		    (dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
+++		     dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
+++			was_ld_map = true;
+++			dst[i].imm = 0;
+++		} else if (was_ld_map &&
+++			   dst[i].code == 0 &&
+++			   dst[i].dst_reg == 0 &&
+++			   dst[i].src_reg == 0 &&
+++			   dst[i].off == 0) {
+++			was_ld_map = false;
+++			dst[i].imm = 0;
+++		} else {
+++			was_ld_map = false;
+++		}
+++	}
+++
+++	psize = bpf_prog_insn_size(fp);
+++	memset(&raw[psize], 0, raw_size - psize);
+++	raw[psize++] = 0x80;
+++
+++	bsize  = round_up(psize, SHA_MESSAGE_BYTES);
+++	blocks = bsize / SHA_MESSAGE_BYTES;
+++	todo   = raw;
+++	if (bsize - psize >= sizeof(__be64)) {
+++		bits = (__be64 *)(todo + bsize - sizeof(__be64));
+++	} else {
+++		bits = (__be64 *)(todo + bsize + bits_offset);
+++		blocks++;
+++	}
+++	*bits = cpu_to_be64((psize - 1) << 3);
+++
+++	while (blocks--) {
+++		sha_transform(digest, todo, ws);
+++		todo += SHA_MESSAGE_BYTES;
+++	}
+++
+++	result = (__force __be32 *)digest;
+++	for (i = 0; i < SHA_DIGEST_WORDS; i++)
+++		result[i] = cpu_to_be32(digest[i]);
+++	memcpy(fp->tag, result, sizeof(fp->tag));
+++
+++	vfree(raw);
+++	return 0;
+++}
+++
+++static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
+++				s32 end_new, s32 curr, const bool probe_pass)
+++{
+++	const s64 imm_min = S32_MIN, imm_max = S32_MAX;
+++	s32 delta = end_new - end_old;
+++	s64 imm = insn->imm;
+++
+++	if (curr < pos && curr + imm + 1 >= end_old)
+++		imm += delta;
+++	else if (curr >= end_new && curr + imm + 1 < end_new)
+++		imm -= delta;
+++	if (imm < imm_min || imm > imm_max)
+++		return -ERANGE;
+++	if (!probe_pass)
+++		insn->imm = imm;
+++	return 0;
+++}
+++
+++static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
+++				s32 end_new, s32 curr, const bool probe_pass)
+++{
+++	const s32 off_min = S16_MIN, off_max = S16_MAX;
+++	s32 delta = end_new - end_old;
+++	s32 off = insn->off;
+++
+++	if (curr < pos && curr + off + 1 >= end_old)
+++		off += delta;
+++	else if (curr >= end_new && curr + off + 1 < end_new)
+++		off -= delta;
+++	if (off < off_min || off > off_max)
+++		return -ERANGE;
+++	if (!probe_pass)
+++		insn->off = off;
+++	return 0;
+++}
+++
+++static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
+++			    s32 end_new, const bool probe_pass)
+++{
+++	u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0);
+++	struct bpf_insn *insn = prog->insnsi;
+++	int ret = 0;
+++
+++	for (i = 0; i < insn_cnt; i++, insn++) {
+++		u8 code;
+++
+++		/* In the probing pass we still operate on the original,
+++		 * unpatched image in order to check overflows before we
+++		 * do any other adjustments. Therefore skip the patchlet.
+++		 */
+++		if (probe_pass && i == pos) {
+++			i = end_new;
+++			insn = prog->insnsi + end_old;
+++		}
+++		code = insn->code;
+++		if ((BPF_CLASS(code) != BPF_JMP &&
+++		     BPF_CLASS(code) != BPF_JMP32) ||
+++		    BPF_OP(code) == BPF_EXIT)
+++			continue;
+++		/* Adjust offset of jmps if we cross patch boundaries. */
+++		if (BPF_OP(code) == BPF_CALL) {
+++			if (insn->src_reg != BPF_PSEUDO_CALL)
+++				continue;
+++			ret = bpf_adj_delta_to_imm(insn, pos, end_old,
+++						   end_new, i, probe_pass);
+++		} else {
+++			ret = bpf_adj_delta_to_off(insn, pos, end_old,
+++						   end_new, i, probe_pass);
+++		}
+++		if (ret)
+++			break;
+++	}
+++
+++	return ret;
+++}
+++
+++static void bpf_adj_linfo(struct bpf_prog *prog, u32 off, u32 delta)
+++{
+++	struct bpf_line_info *linfo;
+++	u32 i, nr_linfo;
+++
+++	nr_linfo = prog->aux->nr_linfo;
+++	if (!nr_linfo || !delta)
+++		return;
+++
+++	linfo = prog->aux->linfo;
+++
+++	for (i = 0; i < nr_linfo; i++)
+++		if (off < linfo[i].insn_off)
+++			break;
+++
+++	/* Push all off < linfo[i].insn_off by delta */
+++	for (; i < nr_linfo; i++)
+++		linfo[i].insn_off += delta;
+++}
+++
+++struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
+++				       const struct bpf_insn *patch, u32 len)
+++{
+++	u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
+++	const u32 cnt_max = S16_MAX;
+++	struct bpf_prog *prog_adj;
+++	int err;
+++
+++	/* Since our patchlet doesn't expand the image, we're done. */
+++	if (insn_delta == 0) {
+++		memcpy(prog->insnsi + off, patch, sizeof(*patch));
+++		return prog;
+++	}
+++
+++	insn_adj_cnt = prog->len + insn_delta;
+++
+++	/* Reject anything that would potentially let the insn->off
+++	 * target overflow when we have excessive program expansions.
+++	 * We need to probe here before we do any reallocation where
+++	 * we afterwards may not fail anymore.
+++	 */
+++	if (insn_adj_cnt > cnt_max &&
+++	    (err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
+++		return ERR_PTR(err);
+++
+++	/* Several new instructions need to be inserted. Make room
+++	 * for them. Likely, there's no need for a new allocation as
+++	 * last page could have large enough tailroom.
+++	 */
+++	prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
+++				    GFP_USER);
+++	if (!prog_adj)
+++		return ERR_PTR(-ENOMEM);
+++
+++	prog_adj->len = insn_adj_cnt;
+++
+++	/* Patching happens in 3 steps:
+++	 *
+++	 * 1) Move over tail of insnsi from next instruction onwards,
+++	 *    so we can patch the single target insn with one or more
+++	 *    new ones (patching is always from 1 to n insns, n > 0).
+++	 * 2) Inject new instructions at the target location.
+++	 * 3) Adjust branch offsets if necessary.
+++	 */
+++	insn_rest = insn_adj_cnt - off - len;
+++
+++	memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1,
+++		sizeof(*patch) * insn_rest);
+++	memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len);
+++
+++	/* We are guaranteed to not fail at this point, otherwise
+++	 * the ship has sailed to reverse to the original state. An
+++	 * overflow cannot happen at this point.
+++	 */
+++	BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false));
+++
+++	bpf_adj_linfo(prog_adj, off, insn_delta);
+++
+++	return prog_adj;
+++}
+++
+++int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
+++{
+++	/* Branch offsets can't overflow when program is shrinking, no need
+++	 * to call bpf_adj_branches(..., true) here
+++	 */
+++	memmove(prog->insnsi + off, prog->insnsi + off + cnt,
+++		sizeof(struct bpf_insn) * (prog->len - off - cnt));
+++	prog->len -= cnt;
+++
+++	return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
+++}
+++
+++static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
+++{
+++	int i;
+++
+++	for (i = 0; i < fp->aux->func_cnt; i++)
+++		bpf_prog_kallsyms_del(fp->aux->func[i]);
+++}
+++
+++void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
+++{
+++	bpf_prog_kallsyms_del_subprogs(fp);
+++	bpf_prog_kallsyms_del(fp);
+++}
++ 
++ #ifdef CONFIG_BPF_JIT
+++/* All BPF JIT sysctl knobs here. */
+++int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
+++int bpf_jit_harden   __read_mostly;
+++int bpf_jit_kallsyms __read_mostly;
+++long bpf_jit_limit   __read_mostly;
+++
+++static __always_inline void
+++bpf_get_prog_addr_region(const struct bpf_prog *prog,
+++			 unsigned long *symbol_start,
+++			 unsigned long *symbol_end)
+++{
+++	const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog);
+++	unsigned long addr = (unsigned long)hdr;
+++
+++	WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog));
+++
+++	*symbol_start = addr;
+++	*symbol_end   = addr + hdr->pages * PAGE_SIZE;
+++}
+++
+++void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+++{
+++	const char *end = sym + KSYM_NAME_LEN;
+++	const struct btf_type *type;
+++	const char *func_name;
+++
+++	BUILD_BUG_ON(sizeof("bpf_prog_") +
+++		     sizeof(prog->tag) * 2 +
+++		     /* name has been null terminated.
+++		      * We should need +1 for the '_' preceding
+++		      * the name.  However, the null character
+++		      * is double counted between the name and the
+++		      * sizeof("bpf_prog_") above, so we omit
+++		      * the +1 here.
+++		      */
+++		     sizeof(prog->aux->name) > KSYM_NAME_LEN);
+++
+++	sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
+++	sym  = bin2hex(sym, prog->tag, sizeof(prog->tag));
+++
+++	/* prog->aux->name will be ignored if full btf name is available */
+++	if (prog->aux->func_info_cnt) {
+++		type = btf_type_by_id(prog->aux->btf,
+++				      prog->aux->func_info[prog->aux->func_idx].type_id);
+++		func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
+++		snprintf(sym, (size_t)(end - sym), "_%s", func_name);
+++		return;
+++	}
+++
+++	if (prog->aux->name[0])
+++		snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
+++	else
+++		*sym = 0;
+++}
+++
+++static __always_inline unsigned long
+++bpf_get_prog_addr_start(struct latch_tree_node *n)
+++{
+++	unsigned long symbol_start, symbol_end;
+++	const struct bpf_prog_aux *aux;
+++
+++	aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
+++	bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
+++
+++	return symbol_start;
+++}
+++
+++static __always_inline bool bpf_tree_less(struct latch_tree_node *a,
+++					  struct latch_tree_node *b)
+++{
+++	return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b);
+++}
+++
+++static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n)
+++{
+++	unsigned long val = (unsigned long)key;
+++	unsigned long symbol_start, symbol_end;
+++	const struct bpf_prog_aux *aux;
+++
+++	aux = container_of(n, struct bpf_prog_aux, ksym_tnode);
+++	bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
+++
+++	if (val < symbol_start)
+++		return -1;
+++	if (val >= symbol_end)
+++		return  1;
+++
+++	return 0;
+++}
+++
+++static const struct latch_tree_ops bpf_tree_ops = {
+++	.less	= bpf_tree_less,
+++	.comp	= bpf_tree_comp,
+++};
+++
+++static DEFINE_SPINLOCK(bpf_lock);
+++static LIST_HEAD(bpf_kallsyms);
+++static struct latch_tree_root bpf_tree __cacheline_aligned;
+++
+++static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
+++{
+++	WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
+++	list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms);
+++	latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
+++}
+++
+++static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux)
+++{
+++	if (list_empty(&aux->ksym_lnode))
+++		return;
+++
+++	latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops);
+++	list_del_rcu(&aux->ksym_lnode);
+++}
+++
+++static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
+++{
+++	return fp->jited && !bpf_prog_was_classic(fp);
+++}
+++
+++static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
+++{
+++	return list_empty(&fp->aux->ksym_lnode) ||
+++	       fp->aux->ksym_lnode.prev == LIST_POISON2;
+++}
+++
+++void bpf_prog_kallsyms_add(struct bpf_prog *fp)
+++{
+++	if (!bpf_prog_kallsyms_candidate(fp) ||
+++	    !capable(CAP_SYS_ADMIN))
+++		return;
+++
+++	spin_lock_bh(&bpf_lock);
+++	bpf_prog_ksym_node_add(fp->aux);
+++	spin_unlock_bh(&bpf_lock);
+++}
+++
+++void bpf_prog_kallsyms_del(struct bpf_prog *fp)
+++{
+++	if (!bpf_prog_kallsyms_candidate(fp))
+++		return;
+++
+++	spin_lock_bh(&bpf_lock);
+++	bpf_prog_ksym_node_del(fp->aux);
+++	spin_unlock_bh(&bpf_lock);
+++}
+++
+++static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
+++{
+++	struct latch_tree_node *n;
+++
+++	if (!bpf_jit_kallsyms_enabled())
+++		return NULL;
+++
+++	n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
+++	return n ?
+++	       container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
+++	       NULL;
+++}
+++
+++const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
+++				 unsigned long *off, char *sym)
+++{
+++	unsigned long symbol_start, symbol_end;
+++	struct bpf_prog *prog;
+++	char *ret = NULL;
+++
+++	rcu_read_lock();
+++	prog = bpf_prog_kallsyms_find(addr);
+++	if (prog) {
+++		bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end);
+++		bpf_get_prog_name(prog, sym);
+++
+++		ret = sym;
+++		if (size)
+++			*size = symbol_end - symbol_start;
+++		if (off)
+++			*off  = addr - symbol_start;
+++	}
+++	rcu_read_unlock();
+++
+++	return ret;
+++}
+++
+++bool is_bpf_text_address(unsigned long addr)
+++{
+++	bool ret;
+++
+++	rcu_read_lock();
+++	ret = bpf_prog_kallsyms_find(addr) != NULL;
+++	rcu_read_unlock();
+++
+++	return ret;
+++}
+++
+++int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
+++		    char *sym)
+++{
+++	struct bpf_prog_aux *aux;
+++	unsigned int it = 0;
+++	int ret = -ERANGE;
+++
+++	if (!bpf_jit_kallsyms_enabled())
+++		return ret;
+++
+++	rcu_read_lock();
+++	list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) {
+++		if (it++ != symnum)
+++			continue;
+++
+++		bpf_get_prog_name(aux->prog, sym);
+++
+++		*value = (unsigned long)aux->prog->bpf_func;
+++		*type  = BPF_SYM_ELF_TYPE;
+++
+++		ret = 0;
+++		break;
+++	}
+++	rcu_read_unlock();
+++
+++	return ret;
+++}
+++
+++static atomic_long_t bpf_jit_current;
+++
+++/* Can be overridden by an arch's JIT compiler if it has a custom,
+++ * dedicated BPF backend memory area, or if neither of the two
+++ * below apply.
+++ */
+++u64 __weak bpf_jit_alloc_exec_limit(void)
+++{
+++#if defined(MODULES_VADDR)
+++	return MODULES_END - MODULES_VADDR;
+++#else
+++	return VMALLOC_END - VMALLOC_START;
+++#endif
+++}
+++
+++static int __init bpf_jit_charge_init(void)
+++{
+++	/* Only used as heuristic here to derive limit. */
+++	bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2,
+++					    PAGE_SIZE), LONG_MAX);
+++	return 0;
+++}
+++pure_initcall(bpf_jit_charge_init);
+++
+++static int bpf_jit_charge_modmem(u32 pages)
+++{
+++	if (atomic_long_add_return(pages, &bpf_jit_current) >
+++	    (bpf_jit_limit >> PAGE_SHIFT)) {
+++		if (!capable(CAP_SYS_ADMIN)) {
+++			atomic_long_sub(pages, &bpf_jit_current);
+++			return -EPERM;
+++		}
+++	}
+++
+++	return 0;
+++}
+++
+++static void bpf_jit_uncharge_modmem(u32 pages)
+++{
+++	atomic_long_sub(pages, &bpf_jit_current);
+++}
+++
+++void *__weak bpf_jit_alloc_exec(unsigned long size)
+++{
+++	return module_alloc(size);
+++}
+++
+++void __weak bpf_jit_free_exec(void *addr)
+++{
+++	module_memfree(addr);
+++}
+++
++ struct bpf_binary_header *
++ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
++ 		     unsigned int alignment,
++ 		     bpf_jit_fill_hole_t bpf_fill_ill_insns)
++ {
++ 	struct bpf_binary_header *hdr;
++-	unsigned int size, hole, start;
+++	u32 size, hole, start, pages;
++ 
++ 	/* Most of BPF filters are really small, but if some of them
++ 	 * fill a page, allow at least 128 extra bytes to insert a
++ 	 * random section of illegal instructions.
++ 	 */
++ 	size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
++-	hdr = module_alloc(size);
++-	if (hdr == NULL)
+++	pages = size / PAGE_SIZE;
+++
+++	if (bpf_jit_charge_modmem(pages))
+++		return NULL;
+++	hdr = bpf_jit_alloc_exec(size);
+++	if (!hdr) {
+++		bpf_jit_uncharge_modmem(pages);
++ 		return NULL;
+++	}
++ 
++ 	/* Fill space with illegal/arch-dep instructions. */
++ 	bpf_fill_ill_insns(hdr, size);
++ 
++-	hdr->pages = size / PAGE_SIZE;
+++	hdr->pages = pages;
++ 	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
++ 		     PAGE_SIZE - sizeof(*hdr));
++-	start = (prandom_u32() % hole) & ~(alignment - 1);
+++	start = (get_random_int() % hole) & ~(alignment - 1);
++ 
++ 	/* Leave a random number of instructions before BPF code. */
++ 	*image_ptr = &hdr->image[start];
++@@ -171,13 +830,301 @@ bpf_jit_binary_alloc(unsigned int progle
++ 
++ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
++ {
++-	module_memfree(hdr);
+++	u32 pages = hdr->pages;
+++
+++	bpf_jit_free_exec(hdr);
+++	bpf_jit_uncharge_modmem(pages);
+++}
+++
+++/* This symbol is only overridden by archs that have different
+++ * requirements than the usual eBPF JITs, f.e. when they only
+++ * implement cBPF JIT, do not set images read-only, etc.
+++ */
+++void __weak bpf_jit_free(struct bpf_prog *fp)
+++{
+++	if (fp->jited) {
+++		struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
+++
+++		bpf_jit_binary_unlock_ro(hdr);
+++		bpf_jit_binary_free(hdr);
+++
+++		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
+++	}
+++
+++	bpf_prog_unlock_free(fp);
+++}
+++
+++int bpf_jit_get_func_addr(const struct bpf_prog *prog,
+++			  const struct bpf_insn *insn, bool extra_pass,
+++			  u64 *func_addr, bool *func_addr_fixed)
+++{
+++	s16 off = insn->off;
+++	s32 imm = insn->imm;
+++	u8 *addr;
+++
+++	*func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
+++	if (!*func_addr_fixed) {
+++		/* Place-holder address till the last pass has collected
+++		 * all addresses for JITed subprograms in which case we
+++		 * can pick them up from prog->aux.
+++		 */
+++		if (!extra_pass)
+++			addr = NULL;
+++		else if (prog->aux->func &&
+++			 off >= 0 && off < prog->aux->func_cnt)
+++			addr = (u8 *)prog->aux->func[off]->bpf_func;
+++		else
+++			return -EINVAL;
+++	} else {
+++		/* Address of a BPF helper call. Since part of the core
+++		 * kernel, it's always at a fixed location. __bpf_call_base
+++		 * and the helper with imm relative to it are both in core
+++		 * kernel.
+++		 */
+++		addr = (u8 *)__bpf_call_base + imm;
+++	}
+++
+++	*func_addr = (unsigned long)addr;
+++	return 0;
+++}
+++
+++static int bpf_jit_blind_insn(const struct bpf_insn *from,
+++			      const struct bpf_insn *aux,
+++			      struct bpf_insn *to_buff,
+++			      bool emit_zext)
+++{
+++	struct bpf_insn *to = to_buff;
+++	u32 imm_rnd = get_random_int();
+++	s16 off;
+++
+++	BUILD_BUG_ON(BPF_REG_AX  + 1 != MAX_BPF_JIT_REG);
+++	BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
+++
+++	/* Constraints on AX register:
+++	 *
+++	 * AX register is inaccessible from user space. It is mapped in
+++	 * all JITs, and used here for constant blinding rewrites. It is
+++	 * typically "stateless" meaning its contents are only valid within
+++	 * the executed instruction, but not across several instructions.
+++	 * There are a few exceptions however which are further detailed
+++	 * below.
+++	 *
+++	 * Constant blinding is only used by JITs, not in the interpreter.
+++	 * The interpreter uses AX in some occasions as a local temporary
+++	 * register e.g. in DIV or MOD instructions.
+++	 *
+++	 * In restricted circumstances, the verifier can also use the AX
+++	 * register for rewrites as long as they do not interfere with
+++	 * the above cases!
+++	 */
+++	if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX)
+++		goto out;
+++
+++	if (from->imm == 0 &&
+++	    (from->code == (BPF_ALU   | BPF_MOV | BPF_K) ||
+++	     from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
+++		*to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg);
+++		goto out;
+++	}
+++
+++	switch (from->code) {
+++	case BPF_ALU | BPF_ADD | BPF_K:
+++	case BPF_ALU | BPF_SUB | BPF_K:
+++	case BPF_ALU | BPF_AND | BPF_K:
+++	case BPF_ALU | BPF_OR  | BPF_K:
+++	case BPF_ALU | BPF_XOR | BPF_K:
+++	case BPF_ALU | BPF_MUL | BPF_K:
+++	case BPF_ALU | BPF_MOV | BPF_K:
+++	case BPF_ALU | BPF_DIV | BPF_K:
+++	case BPF_ALU | BPF_MOD | BPF_K:
+++		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
+++		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+++		*to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX);
+++		break;
+++
+++	case BPF_ALU64 | BPF_ADD | BPF_K:
+++	case BPF_ALU64 | BPF_SUB | BPF_K:
+++	case BPF_ALU64 | BPF_AND | BPF_K:
+++	case BPF_ALU64 | BPF_OR  | BPF_K:
+++	case BPF_ALU64 | BPF_XOR | BPF_K:
+++	case BPF_ALU64 | BPF_MUL | BPF_K:
+++	case BPF_ALU64 | BPF_MOV | BPF_K:
+++	case BPF_ALU64 | BPF_DIV | BPF_K:
+++	case BPF_ALU64 | BPF_MOD | BPF_K:
+++		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
+++		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+++		*to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX);
+++		break;
+++
+++	case BPF_JMP | BPF_JEQ  | BPF_K:
+++	case BPF_JMP | BPF_JNE  | BPF_K:
+++	case BPF_JMP | BPF_JGT  | BPF_K:
+++	case BPF_JMP | BPF_JLT  | BPF_K:
+++	case BPF_JMP | BPF_JGE  | BPF_K:
+++	case BPF_JMP | BPF_JLE  | BPF_K:
+++	case BPF_JMP | BPF_JSGT | BPF_K:
+++	case BPF_JMP | BPF_JSLT | BPF_K:
+++	case BPF_JMP | BPF_JSGE | BPF_K:
+++	case BPF_JMP | BPF_JSLE | BPF_K:
+++	case BPF_JMP | BPF_JSET | BPF_K:
+++		/* Accommodate for extra offset in case of a backjump. */
+++		off = from->off;
+++		if (off < 0)
+++			off -= 2;
+++		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
+++		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+++		*to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
+++		break;
+++
+++	case BPF_JMP32 | BPF_JEQ  | BPF_K:
+++	case BPF_JMP32 | BPF_JNE  | BPF_K:
+++	case BPF_JMP32 | BPF_JGT  | BPF_K:
+++	case BPF_JMP32 | BPF_JLT  | BPF_K:
+++	case BPF_JMP32 | BPF_JGE  | BPF_K:
+++	case BPF_JMP32 | BPF_JLE  | BPF_K:
+++	case BPF_JMP32 | BPF_JSGT | BPF_K:
+++	case BPF_JMP32 | BPF_JSLT | BPF_K:
+++	case BPF_JMP32 | BPF_JSGE | BPF_K:
+++	case BPF_JMP32 | BPF_JSLE | BPF_K:
+++	case BPF_JMP32 | BPF_JSET | BPF_K:
+++		/* Accommodate for extra offset in case of a backjump. */
+++		off = from->off;
+++		if (off < 0)
+++			off -= 2;
+++		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
+++		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+++		*to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
+++				      off);
+++		break;
+++
+++	case BPF_LD | BPF_IMM | BPF_DW:
+++		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
+++		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+++		*to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
+++		*to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX);
+++		break;
+++	case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
+++		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm);
+++		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+++		if (emit_zext)
+++			*to++ = BPF_ZEXT_REG(BPF_REG_AX);
+++		*to++ = BPF_ALU64_REG(BPF_OR,  aux[0].dst_reg, BPF_REG_AX);
+++		break;
+++
+++	case BPF_ST | BPF_MEM | BPF_DW:
+++	case BPF_ST | BPF_MEM | BPF_W:
+++	case BPF_ST | BPF_MEM | BPF_H:
+++	case BPF_ST | BPF_MEM | BPF_B:
+++		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
+++		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+++		*to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off);
+++		break;
+++	}
+++out:
+++	return to - to_buff;
+++}
+++
+++static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
+++					      gfp_t gfp_extra_flags)
+++{
+++	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+++	struct bpf_prog *fp;
+++
+++	fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
+++	if (fp != NULL) {
+++		/* aux->prog still points to the fp_other one, so
+++		 * when promoting the clone to the real program,
+++		 * this still needs to be adapted.
+++		 */
+++		memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE);
+++	}
+++
+++	return fp;
+++}
+++
+++static void bpf_prog_clone_free(struct bpf_prog *fp)
+++{
+++	/* aux was stolen by the other clone, so we cannot free
+++	 * it from this path! It will be freed eventually by the
+++	 * other program on release.
+++	 *
+++	 * At this point, we don't need a deferred release since
+++	 * clone is guaranteed to not be locked.
+++	 */
+++	fp->aux = NULL;
+++	__bpf_prog_free(fp);
+++}
+++
+++void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
+++{
+++	/* We have to repoint aux->prog to self, as we don't
+++	 * know whether fp here is the clone or the original.
+++	 */
+++	fp->aux->prog = fp;
+++	bpf_prog_clone_free(fp_other);
+++}
+++
+++struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
+++{
+++	struct bpf_insn insn_buff[16], aux[2];
+++	struct bpf_prog *clone, *tmp;
+++	int insn_delta, insn_cnt;
+++	struct bpf_insn *insn;
+++	int i, rewritten;
+++
+++	if (!bpf_jit_blinding_enabled(prog) || prog->blinded)
+++		return prog;
+++
+++	clone = bpf_prog_clone_create(prog, GFP_USER);
+++	if (!clone)
+++		return ERR_PTR(-ENOMEM);
+++
+++	insn_cnt = clone->len;
+++	insn = clone->insnsi;
+++
+++	for (i = 0; i < insn_cnt; i++, insn++) {
+++		/* We temporarily need to hold the original ld64 insn
+++		 * so that we can still access the first part in the
+++		 * second blinding run.
+++		 */
+++		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) &&
+++		    insn[1].code == 0)
+++			memcpy(aux, insn, sizeof(aux));
+++
+++		rewritten = bpf_jit_blind_insn(insn, aux, insn_buff,
+++						clone->aux->verifier_zext);
+++		if (!rewritten)
+++			continue;
+++
+++		tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
+++		if (IS_ERR(tmp)) {
+++			/* Patching may have repointed aux->prog during
+++			 * realloc from the original one, so we need to
+++			 * fix it up here on error.
+++			 */
+++			bpf_jit_prog_release_other(prog, clone);
+++			return tmp;
+++		}
+++
+++		clone = tmp;
+++		insn_delta = rewritten - 1;
+++
+++		/* Walk new program and skip insns we just inserted. */
+++		insn = clone->insnsi + i + insn_delta;
+++		insn_cnt += insn_delta;
+++		i        += insn_delta;
+++	}
+++
+++	clone->blinded = 1;
+++	return clone;
++ }
++ #endif /* CONFIG_BPF_JIT */
++ 
++ /* Base function for offset calculation. Needs to go into .text section,
++  * therefore keeping it non-static as well; will also be used by JITs
++- * anyway later on, so do not let the compiler omit it.
+++ * anyway later on, so do not let the compiler omit it. This also needs
+++ * to go into kallsyms for correlation from e.g. bpftool, so naming
+++ * must not change.
++  */
++ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
++ {
++@@ -185,157 +1132,243 @@ noinline u64 __bpf_call_base(u64 r1, u64
++ }
++ EXPORT_SYMBOL_GPL(__bpf_call_base);
++ 
+++/* All UAPI available opcodes. */
+++#define BPF_INSN_MAP(INSN_2, INSN_3)		\
+++	/* 32 bit ALU operations. */		\
+++	/*   Register based. */			\
+++	INSN_3(ALU, ADD,  X),			\
+++	INSN_3(ALU, SUB,  X),			\
+++	INSN_3(ALU, AND,  X),			\
+++	INSN_3(ALU, OR,   X),			\
+++	INSN_3(ALU, LSH,  X),			\
+++	INSN_3(ALU, RSH,  X),			\
+++	INSN_3(ALU, XOR,  X),			\
+++	INSN_3(ALU, MUL,  X),			\
+++	INSN_3(ALU, MOV,  X),			\
+++	INSN_3(ALU, ARSH, X),			\
+++	INSN_3(ALU, DIV,  X),			\
+++	INSN_3(ALU, MOD,  X),			\
+++	INSN_2(ALU, NEG),			\
+++	INSN_3(ALU, END, TO_BE),		\
+++	INSN_3(ALU, END, TO_LE),		\
+++	/*   Immediate based. */		\
+++	INSN_3(ALU, ADD,  K),			\
+++	INSN_3(ALU, SUB,  K),			\
+++	INSN_3(ALU, AND,  K),			\
+++	INSN_3(ALU, OR,   K),			\
+++	INSN_3(ALU, LSH,  K),			\
+++	INSN_3(ALU, RSH,  K),			\
+++	INSN_3(ALU, XOR,  K),			\
+++	INSN_3(ALU, MUL,  K),			\
+++	INSN_3(ALU, MOV,  K),			\
+++	INSN_3(ALU, ARSH, K),			\
+++	INSN_3(ALU, DIV,  K),			\
+++	INSN_3(ALU, MOD,  K),			\
+++	/* 64 bit ALU operations. */		\
+++	/*   Register based. */			\
+++	INSN_3(ALU64, ADD,  X),			\
+++	INSN_3(ALU64, SUB,  X),			\
+++	INSN_3(ALU64, AND,  X),			\
+++	INSN_3(ALU64, OR,   X),			\
+++	INSN_3(ALU64, LSH,  X),			\
+++	INSN_3(ALU64, RSH,  X),			\
+++	INSN_3(ALU64, XOR,  X),			\
+++	INSN_3(ALU64, MUL,  X),			\
+++	INSN_3(ALU64, MOV,  X),			\
+++	INSN_3(ALU64, ARSH, X),			\
+++	INSN_3(ALU64, DIV,  X),			\
+++	INSN_3(ALU64, MOD,  X),			\
+++	INSN_2(ALU64, NEG),			\
+++	/*   Immediate based. */		\
+++	INSN_3(ALU64, ADD,  K),			\
+++	INSN_3(ALU64, SUB,  K),			\
+++	INSN_3(ALU64, AND,  K),			\
+++	INSN_3(ALU64, OR,   K),			\
+++	INSN_3(ALU64, LSH,  K),			\
+++	INSN_3(ALU64, RSH,  K),			\
+++	INSN_3(ALU64, XOR,  K),			\
+++	INSN_3(ALU64, MUL,  K),			\
+++	INSN_3(ALU64, MOV,  K),			\
+++	INSN_3(ALU64, ARSH, K),			\
+++	INSN_3(ALU64, DIV,  K),			\
+++	INSN_3(ALU64, MOD,  K),			\
+++	/* Call instruction. */			\
+++	INSN_2(JMP, CALL),			\
+++	/* Exit instruction. */			\
+++	INSN_2(JMP, EXIT),			\
+++	/* 32-bit Jump instructions. */		\
+++	/*   Register based. */			\
+++	INSN_3(JMP32, JEQ,  X),			\
+++	INSN_3(JMP32, JNE,  X),			\
+++	INSN_3(JMP32, JGT,  X),			\
+++	INSN_3(JMP32, JLT,  X),			\
+++	INSN_3(JMP32, JGE,  X),			\
+++	INSN_3(JMP32, JLE,  X),			\
+++	INSN_3(JMP32, JSGT, X),			\
+++	INSN_3(JMP32, JSLT, X),			\
+++	INSN_3(JMP32, JSGE, X),			\
+++	INSN_3(JMP32, JSLE, X),			\
+++	INSN_3(JMP32, JSET, X),			\
+++	/*   Immediate based. */		\
+++	INSN_3(JMP32, JEQ,  K),			\
+++	INSN_3(JMP32, JNE,  K),			\
+++	INSN_3(JMP32, JGT,  K),			\
+++	INSN_3(JMP32, JLT,  K),			\
+++	INSN_3(JMP32, JGE,  K),			\
+++	INSN_3(JMP32, JLE,  K),			\
+++	INSN_3(JMP32, JSGT, K),			\
+++	INSN_3(JMP32, JSLT, K),			\
+++	INSN_3(JMP32, JSGE, K),			\
+++	INSN_3(JMP32, JSLE, K),			\
+++	INSN_3(JMP32, JSET, K),			\
+++	/* Jump instructions. */		\
+++	/*   Register based. */			\
+++	INSN_3(JMP, JEQ,  X),			\
+++	INSN_3(JMP, JNE,  X),			\
+++	INSN_3(JMP, JGT,  X),			\
+++	INSN_3(JMP, JLT,  X),			\
+++	INSN_3(JMP, JGE,  X),			\
+++	INSN_3(JMP, JLE,  X),			\
+++	INSN_3(JMP, JSGT, X),			\
+++	INSN_3(JMP, JSLT, X),			\
+++	INSN_3(JMP, JSGE, X),			\
+++	INSN_3(JMP, JSLE, X),			\
+++	INSN_3(JMP, JSET, X),			\
+++	/*   Immediate based. */		\
+++	INSN_3(JMP, JEQ,  K),			\
+++	INSN_3(JMP, JNE,  K),			\
+++	INSN_3(JMP, JGT,  K),			\
+++	INSN_3(JMP, JLT,  K),			\
+++	INSN_3(JMP, JGE,  K),			\
+++	INSN_3(JMP, JLE,  K),			\
+++	INSN_3(JMP, JSGT, K),			\
+++	INSN_3(JMP, JSLT, K),			\
+++	INSN_3(JMP, JSGE, K),			\
+++	INSN_3(JMP, JSLE, K),			\
+++	INSN_3(JMP, JSET, K),			\
+++	INSN_2(JMP, JA),			\
+++	/* Store instructions. */		\
+++	/*   Register based. */			\
+++	INSN_3(STX, MEM,  B),			\
+++	INSN_3(STX, MEM,  H),			\
+++	INSN_3(STX, MEM,  W),			\
+++	INSN_3(STX, MEM,  DW),			\
+++	INSN_3(STX, XADD, W),			\
+++	INSN_3(STX, XADD, DW),			\
+++	/*   Immediate based. */		\
+++	INSN_3(ST, MEM, B),			\
+++	INSN_3(ST, MEM, H),			\
+++	INSN_3(ST, MEM, W),			\
+++	INSN_3(ST, MEM, DW),			\
+++	/* Load instructions. */		\
+++	/*   Register based. */			\
+++	INSN_3(LDX, MEM, B),			\
+++	INSN_3(LDX, MEM, H),			\
+++	INSN_3(LDX, MEM, W),			\
+++	INSN_3(LDX, MEM, DW),			\
+++	/*   Immediate based. */		\
+++	INSN_3(LD, IMM, DW)
+++
+++bool bpf_opcode_in_insntable(u8 code)
+++{
+++#define BPF_INSN_2_TBL(x, y)    [BPF_##x | BPF_##y] = true
+++#define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true
+++	static const bool public_insntable[256] = {
+++		[0 ... 255] = false,
+++		/* Now overwrite non-defaults ... */
+++		BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
+++		/* UAPI exposed, but rewritten opcodes. cBPF carry-over. */
+++		[BPF_LD | BPF_ABS | BPF_B] = true,
+++		[BPF_LD | BPF_ABS | BPF_H] = true,
+++		[BPF_LD | BPF_ABS | BPF_W] = true,
+++		[BPF_LD | BPF_IND | BPF_B] = true,
+++		[BPF_LD | BPF_IND | BPF_H] = true,
+++		[BPF_LD | BPF_IND | BPF_W] = true,
+++	};
+++#undef BPF_INSN_3_TBL
+++#undef BPF_INSN_2_TBL
+++	return public_insntable[code];
+++}
+++
+++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
++ /**
++  *	__bpf_prog_run - run eBPF program on a given context
++- *	@ctx: is the data we are operating on
+++ *	@regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
++  *	@insn: is the array of eBPF instructions
+++ *	@stack: is the eBPF storage stack
++  *
++  * Decode and execute eBPF instructions.
++  */
++-static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
+++static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
++ {
++-	u64 stack[MAX_BPF_STACK / sizeof(u64)];
++-	u64 regs[MAX_BPF_REG], tmp;
++-	static const void *jumptable[256] = {
+++#define BPF_INSN_2_LBL(x, y)    [BPF_##x | BPF_##y] = &&x##_##y
+++#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
+++	static const void * const jumptable[256] = {
++ 		[0 ... 255] = &&default_label,
++ 		/* Now overwrite non-defaults ... */
++-		/* 32 bit ALU operations */
++-		[BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
++-		[BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
++-		[BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
++-		[BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
++-		[BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
++-		[BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
++-		[BPF_ALU | BPF_OR | BPF_X]  = &&ALU_OR_X,
++-		[BPF_ALU | BPF_OR | BPF_K]  = &&ALU_OR_K,
++-		[BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
++-		[BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
++-		[BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
++-		[BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
++-		[BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
++-		[BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
++-		[BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
++-		[BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
++-		[BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
++-		[BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
++-		[BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
++-		[BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
++-		[BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
++-		[BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
++-		[BPF_ALU | BPF_NEG] = &&ALU_NEG,
++-		[BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
++-		[BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
++-		/* 64 bit ALU operations */
++-		[BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
++-		[BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
++-		[BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
++-		[BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
++-		[BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
++-		[BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
++-		[BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
++-		[BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
++-		[BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
++-		[BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
++-		[BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
++-		[BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
++-		[BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
++-		[BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
++-		[BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
++-		[BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
++-		[BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
++-		[BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
++-		[BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
++-		[BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
++-		[BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
++-		[BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
++-		[BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
++-		[BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
++-		[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
++-		/* Call instruction */
++-		[BPF_JMP | BPF_CALL] = &&JMP_CALL,
++-		[BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL,
++-		/* Jumps */
++-		[BPF_JMP | BPF_JA] = &&JMP_JA,
++-		[BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
++-		[BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
++-		[BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
++-		[BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
++-		[BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
++-		[BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
++-		[BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
++-		[BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
++-		[BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
++-		[BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
++-		[BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
++-		[BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
++-		[BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
++-		[BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
++-		/* Program return */
++-		[BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
++-		/* Store instructions */
++-		[BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
++-		[BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
++-		[BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
++-		[BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
++-		[BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
++-		[BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
++-		[BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
++-		[BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
++-		[BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
++-		[BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
++-		/* Load instructions */
++-		[BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
++-		[BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
++-		[BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
++-		[BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
++-		[BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
++-		[BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
++-		[BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
++-		[BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
++-		[BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
++-		[BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
++-		[BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW,
+++		BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL),
+++		/* Non-UAPI available opcodes. */
+++		[BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
+++		[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
++ 	};
+++#undef BPF_INSN_3_LBL
+++#undef BPF_INSN_2_LBL
++ 	u32 tail_call_cnt = 0;
++-	void *ptr;
++-	int off;
++ 
++ #define CONT	 ({ insn++; goto select_insn; })
++ #define CONT_JMP ({ insn++; goto select_insn; })
++ 
++-	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
++-	ARG1 = (u64) (unsigned long) ctx;
++-
++-	/* Registers used in classic BPF programs need to be reset first. */
++-	regs[BPF_REG_A] = 0;
++-	regs[BPF_REG_X] = 0;
++-
++ select_insn:
++ 	goto *jumptable[insn->code];
++ 
++-	/* ALU */
++-#define ALU(OPCODE, OP)			\
++-	ALU64_##OPCODE##_X:		\
++-		DST = DST OP SRC;	\
++-		CONT;			\
++-	ALU_##OPCODE##_X:		\
++-		DST = (u32) DST OP (u32) SRC;	\
++-		CONT;			\
++-	ALU64_##OPCODE##_K:		\
++-		DST = DST OP IMM;		\
++-		CONT;			\
++-	ALU_##OPCODE##_K:		\
++-		DST = (u32) DST OP (u32) IMM;	\
+++	/* Explicitly mask the register-based shift amounts with 63 or 31
+++	 * to avoid undefined behavior. Normally this won't affect the
+++	 * generated code, for example, in case of native 64 bit archs such
+++	 * as x86-64 or arm64, the compiler is optimizing the AND away for
+++	 * the interpreter. In case of JITs, each of the JIT backends compiles
+++	 * the BPF shift operations to machine instructions which produce
+++	 * implementation-defined results in such a case; the resulting
+++	 * contents of the register may be arbitrary, but program behaviour
+++	 * as a whole remains defined. In other words, in case of JIT backends,
+++	 * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation.
+++	 */
+++	/* ALU (shifts) */
+++#define SHT(OPCODE, OP)					\
+++	ALU64_##OPCODE##_X:				\
+++		DST = DST OP (SRC & 63);		\
+++		CONT;					\
+++	ALU_##OPCODE##_X:				\
+++		DST = (u32) DST OP ((u32) SRC & 31);	\
+++		CONT;					\
+++	ALU64_##OPCODE##_K:				\
+++		DST = DST OP IMM;			\
+++		CONT;					\
+++	ALU_##OPCODE##_K:				\
+++		DST = (u32) DST OP (u32) IMM;		\
+++		CONT;
+++	/* ALU (rest) */
+++#define ALU(OPCODE, OP)					\
+++	ALU64_##OPCODE##_X:				\
+++		DST = DST OP SRC;			\
+++		CONT;					\
+++	ALU_##OPCODE##_X:				\
+++		DST = (u32) DST OP (u32) SRC;		\
+++		CONT;					\
+++	ALU64_##OPCODE##_K:				\
+++		DST = DST OP IMM;			\
+++		CONT;					\
+++	ALU_##OPCODE##_K:				\
+++		DST = (u32) DST OP (u32) IMM;		\
++ 		CONT;
++-
++ 	ALU(ADD,  +)
++ 	ALU(SUB,  -)
++ 	ALU(AND,  &)
++ 	ALU(OR,   |)
++-	ALU(LSH, <<)
++-	ALU(RSH, >>)
++ 	ALU(XOR,  ^)
++ 	ALU(MUL,  *)
+++	SHT(LSH, <<)
+++	SHT(RSH, >>)
+++#undef SHT
++ #undef ALU
++ 	ALU_NEG:
++ 		DST = (u32) -DST;
++@@ -359,51 +1392,49 @@ select_insn:
++ 		DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
++ 		insn++;
++ 		CONT;
+++	ALU_ARSH_X:
+++		DST = (u64) (u32) (((s32) DST) >> (SRC & 31));
+++		CONT;
+++	ALU_ARSH_K:
+++		DST = (u64) (u32) (((s32) DST) >> IMM);
+++		CONT;
++ 	ALU64_ARSH_X:
++-		(*(s64 *) &DST) >>= SRC;
+++		(*(s64 *) &DST) >>= (SRC & 63);
++ 		CONT;
++ 	ALU64_ARSH_K:
++ 		(*(s64 *) &DST) >>= IMM;
++ 		CONT;
++ 	ALU64_MOD_X:
++-		if (unlikely(SRC == 0))
++-			return 0;
++-		div64_u64_rem(DST, SRC, &tmp);
++-		DST = tmp;
+++		div64_u64_rem(DST, SRC, &AX);
+++		DST = AX;
++ 		CONT;
++ 	ALU_MOD_X:
++-		if (unlikely(SRC == 0))
++-			return 0;
++-		tmp = (u32) DST;
++-		DST = do_div(tmp, (u32) SRC);
+++		AX = (u32) DST;
+++		DST = do_div(AX, (u32) SRC);
++ 		CONT;
++ 	ALU64_MOD_K:
++-		div64_u64_rem(DST, IMM, &tmp);
++-		DST = tmp;
+++		div64_u64_rem(DST, IMM, &AX);
+++		DST = AX;
++ 		CONT;
++ 	ALU_MOD_K:
++-		tmp = (u32) DST;
++-		DST = do_div(tmp, (u32) IMM);
+++		AX = (u32) DST;
+++		DST = do_div(AX, (u32) IMM);
++ 		CONT;
++ 	ALU64_DIV_X:
++-		if (unlikely(SRC == 0))
++-			return 0;
++ 		DST = div64_u64(DST, SRC);
++ 		CONT;
++ 	ALU_DIV_X:
++-		if (unlikely(SRC == 0))
++-			return 0;
++-		tmp = (u32) DST;
++-		do_div(tmp, (u32) SRC);
++-		DST = (u32) tmp;
+++		AX = (u32) DST;
+++		do_div(AX, (u32) SRC);
+++		DST = (u32) AX;
++ 		CONT;
++ 	ALU64_DIV_K:
++ 		DST = div64_u64(DST, IMM);
++ 		CONT;
++ 	ALU_DIV_K:
++-		tmp = (u32) DST;
++-		do_div(tmp, (u32) IMM);
++-		DST = (u32) tmp;
+++		AX = (u32) DST;
+++		do_div(AX, (u32) IMM);
+++		DST = (u32) AX;
++ 		CONT;
++ 	ALU_END_TO_BE:
++ 		switch (IMM) {
++@@ -442,22 +1473,28 @@ select_insn:
++ 						       BPF_R4, BPF_R5);
++ 		CONT;
++ 
+++	JMP_CALL_ARGS:
+++		BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2,
+++							    BPF_R3, BPF_R4,
+++							    BPF_R5,
+++							    insn + insn->off + 1);
+++		CONT;
+++
++ 	JMP_TAIL_CALL: {
++ 		struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
++ 		struct bpf_array *array = container_of(map, struct bpf_array, map);
++ 		struct bpf_prog *prog;
++-		u64 index = BPF_R3;
+++		u32 index = BPF_R3;
++ 
++ 		if (unlikely(index >= array->map.max_entries))
++ 			goto out;
++-
++ 		if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
++ 			goto out;
++ 
++ 		tail_call_cnt++;
++ 
++ 		prog = READ_ONCE(array->ptrs[index]);
++-		if (unlikely(!prog))
+++		if (!prog)
++ 			goto out;
++ 
++ 		/* ARG1 at this point is guaranteed to point to CTX from
++@@ -470,97 +1507,49 @@ select_insn:
++ out:
++ 		CONT;
++ 	}
++-	/* JMP */
++ 	JMP_JA:
++ 		insn += insn->off;
++ 		CONT;
++-	JMP_JEQ_X:
++-		if (DST == SRC) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JEQ_K:
++-		if (DST == IMM) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JNE_X:
++-		if (DST != SRC) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JNE_K:
++-		if (DST != IMM) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JGT_X:
++-		if (DST > SRC) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JGT_K:
++-		if (DST > IMM) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JGE_X:
++-		if (DST >= SRC) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JGE_K:
++-		if (DST >= IMM) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JSGT_X:
++-		if (((s64) DST) > ((s64) SRC)) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JSGT_K:
++-		if (((s64) DST) > ((s64) IMM)) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JSGE_X:
++-		if (((s64) DST) >= ((s64) SRC)) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JSGE_K:
++-		if (((s64) DST) >= ((s64) IMM)) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JSET_X:
++-		if (DST & SRC) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++-	JMP_JSET_K:
++-		if (DST & IMM) {
++-			insn += insn->off;
++-			CONT_JMP;
++-		}
++-		CONT;
++ 	JMP_EXIT:
++ 		return BPF_R0;
++-
+++	/* JMP */
+++#define COND_JMP(SIGN, OPCODE, CMP_OP)				\
+++	JMP_##OPCODE##_X:					\
+++		if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) {	\
+++			insn += insn->off;			\
+++			CONT_JMP;				\
+++		}						\
+++		CONT;						\
+++	JMP32_##OPCODE##_X:					\
+++		if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) {	\
+++			insn += insn->off;			\
+++			CONT_JMP;				\
+++		}						\
+++		CONT;						\
+++	JMP_##OPCODE##_K:					\
+++		if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) {	\
+++			insn += insn->off;			\
+++			CONT_JMP;				\
+++		}						\
+++		CONT;						\
+++	JMP32_##OPCODE##_K:					\
+++		if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) {	\
+++			insn += insn->off;			\
+++			CONT_JMP;				\
+++		}						\
+++		CONT;
+++	COND_JMP(u, JEQ, ==)
+++	COND_JMP(u, JNE, !=)
+++	COND_JMP(u, JGT, >)
+++	COND_JMP(u, JLT, <)
+++	COND_JMP(u, JGE, >=)
+++	COND_JMP(u, JLE, <=)
+++	COND_JMP(u, JSET, &)
+++	COND_JMP(s, JSGT, >)
+++	COND_JMP(s, JSLT, <)
+++	COND_JMP(s, JSGE, >=)
+++	COND_JMP(s, JSLE, <=)
+++#undef COND_JMP
++ 	/* STX and ST and LDX*/
++ #define LDST(SIZEOP, SIZE)						\
++ 	STX_MEM_##SIZEOP:						\
++@@ -586,77 +1575,108 @@ out:
++ 		atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
++ 			     (DST + insn->off));
++ 		CONT;
++-	LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
++-		off = IMM;
++-load_word:
++-		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
++-		 * only appearing in the programs where ctx ==
++-		 * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
++-		 * == BPF_R6, bpf_convert_filter() saves it in BPF_R6,
++-		 * internal BPF verifier will check that BPF_R6 ==
++-		 * ctx.
++-		 *
++-		 * BPF_ABS and BPF_IND are wrappers of function calls,
++-		 * so they scratch BPF_R1-BPF_R5 registers, preserve
++-		 * BPF_R6-BPF_R9, and store return value into BPF_R0.
++-		 *
++-		 * Implicit input:
++-		 *   ctx == skb == BPF_R6 == CTX
++-		 *
++-		 * Explicit input:
++-		 *   SRC == any register
++-		 *   IMM == 32-bit immediate
+++
+++	default_label:
+++		/* If we ever reach this, we have a bug somewhere. Die hard here
+++		 * instead of just returning 0; we could be somewhere in a subprog,
+++		 * so execution could continue otherwise which we do /not/ want.
++ 		 *
++-		 * Output:
++-		 *   BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
+++		 * Note, verifier whitelists all opcodes in bpf_opcode_in_insntable().
++ 		 */
+++		pr_warn("BPF interpreter: unknown opcode %02x\n", insn->code);
+++		BUG_ON(1);
+++		return 0;
+++}
++ 
++-		ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
++-		if (likely(ptr != NULL)) {
++-			BPF_R0 = get_unaligned_be32(ptr);
++-			CONT;
++-		}
+++#define PROG_NAME(stack_size) __bpf_prog_run##stack_size
+++#define DEFINE_BPF_PROG_RUN(stack_size) \
+++static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
+++{ \
+++	u64 stack[stack_size / sizeof(u64)]; \
+++	u64 regs[MAX_BPF_EXT_REG]; \
+++\
+++	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
+++	ARG1 = (u64) (unsigned long) ctx; \
+++	return ___bpf_prog_run(regs, insn, stack); \
+++}
++ 
++-		return 0;
++-	LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
++-		off = IMM;
++-load_half:
++-		ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
++-		if (likely(ptr != NULL)) {
++-			BPF_R0 = get_unaligned_be16(ptr);
++-			CONT;
++-		}
+++#define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size
+++#define DEFINE_BPF_PROG_RUN_ARGS(stack_size) \
+++static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
+++				      const struct bpf_insn *insn) \
+++{ \
+++	u64 stack[stack_size / sizeof(u64)]; \
+++	u64 regs[MAX_BPF_EXT_REG]; \
+++\
+++	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
+++	BPF_R1 = r1; \
+++	BPF_R2 = r2; \
+++	BPF_R3 = r3; \
+++	BPF_R4 = r4; \
+++	BPF_R5 = r5; \
+++	return ___bpf_prog_run(regs, insn, stack); \
+++}
++ 
++-		return 0;
++-	LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
++-		off = IMM;
++-load_byte:
++-		ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
++-		if (likely(ptr != NULL)) {
++-			BPF_R0 = *(u8 *)ptr;
++-			CONT;
++-		}
+++#define EVAL1(FN, X) FN(X)
+++#define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y)
+++#define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y)
+++#define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y)
+++#define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y)
+++#define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y)
+++
+++EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192);
+++EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384);
+++EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512);
+++
+++EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 32, 64, 96, 128, 160, 192);
+++EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 224, 256, 288, 320, 352, 384);
+++EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512);
+++
+++#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),
+++
+++static unsigned int (*interpreters[])(const void *ctx,
+++				      const struct bpf_insn *insn) = {
+++EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
+++EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
+++EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
+++};
+++#undef PROG_NAME_LIST
+++#define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size),
+++static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5,
+++				  const struct bpf_insn *insn) = {
+++EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
+++EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
+++EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
+++};
+++#undef PROG_NAME_LIST
++ 
++-		return 0;
++-	LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
++-		off = IMM + SRC;
++-		goto load_word;
++-	LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
++-		off = IMM + SRC;
++-		goto load_half;
++-	LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
++-		off = IMM + SRC;
++-		goto load_byte;
+++void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
+++{
+++	stack_depth = max_t(u32, stack_depth, 1);
+++	insn->off = (s16) insn->imm;
+++	insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] -
+++		__bpf_call_base_args;
+++	insn->code = BPF_JMP | BPF_CALL_ARGS;
+++}
++ 
++-	default_label:
++-		/* If we ever reach this, we have a bug somewhere. */
++-		WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
++-		return 0;
+++#else
+++static unsigned int __bpf_prog_ret0_warn(const void *ctx,
+++					 const struct bpf_insn *insn)
+++{
+++	/* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON
+++	 * is not working properly, so warn about it!
+++	 */
+++	WARN_ON_ONCE(1);
+++	return 0;
++ }
+++#endif
++ 
++ bool bpf_prog_array_compatible(struct bpf_array *array,
++ 			       const struct bpf_prog *fp)
++ {
+++	if (fp->kprobe_override)
+++		return false;
+++
++ 	if (!array->owner_prog_type) {
++ 		/* There's no owner yet where we could check for
++ 		 * compatibility.
++@@ -691,18 +1711,62 @@ static int bpf_check_tail_call(const str
++ 	return 0;
++ }
++ 
+++static void bpf_prog_select_func(struct bpf_prog *fp)
+++{
+++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+++	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+++
+++	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
+++#else
+++	fp->bpf_func = __bpf_prog_ret0_warn;
+++#endif
+++}
+++
++ /**
++  *	bpf_prog_select_runtime - select exec runtime for BPF program
++  *	@fp: bpf_prog populated with internal BPF program
+++ *	@err: pointer to error variable
++  *
++  * Try to JIT eBPF program, if JIT is not available, use interpreter.
++  * The BPF program will be executed via BPF_PROG_RUN() macro.
++  */
++-int bpf_prog_select_runtime(struct bpf_prog *fp)
+++struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
++ {
++-	fp->bpf_func = (void *) __bpf_prog_run;
+++	/* In case of BPF to BPF calls, verifier did all the prep
+++	 * work with regards to JITing, etc.
+++	 */
+++	if (fp->bpf_func)
+++		goto finalize;
+++
+++	bpf_prog_select_func(fp);
+++
+++	/* eBPF JITs can rewrite the program in case constant
+++	 * blinding is active. However, in case of error during
+++	 * blinding, bpf_int_jit_compile() must always return a
+++	 * valid program, which in this case would simply not
+++	 * be JITed, but falls back to the interpreter.
+++	 */
+++	if (!bpf_prog_is_dev_bound(fp->aux)) {
+++		*err = bpf_prog_alloc_jited_linfo(fp);
+++		if (*err)
+++			return fp;
+++
+++		fp = bpf_int_jit_compile(fp);
+++		if (!fp->jited) {
+++			bpf_prog_free_jited_linfo(fp);
+++#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+++			*err = -ENOTSUPP;
+++			return fp;
+++#endif
+++		} else {
+++			bpf_prog_free_unused_jited_linfo(fp);
+++		}
+++	} else {
+++		*err = -EINVAL;
+++		return fp;
+++	}
++ 
++-	bpf_int_jit_compile(fp);
+++finalize:
++ 	bpf_prog_lock_ro(fp);
++ 
++ 	/* The tail call compatibility check can only be done at
++@@ -710,16 +1774,238 @@ int bpf_prog_select_runtime(struct bpf_p
++ 	 * with JITed or non JITed program concatenations and not
++ 	 * all eBPF JITs might immediately support all features.
++ 	 */
++-	return bpf_check_tail_call(fp);
+++	*err = bpf_check_tail_call(fp);
+++
+++	return fp;
++ }
++ EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
++ 
+++static unsigned int __bpf_prog_ret1(const void *ctx,
+++				    const struct bpf_insn *insn)
+++{
+++	return 1;
+++}
+++
+++static struct bpf_prog_dummy {
+++	struct bpf_prog prog;
+++} dummy_bpf_prog = {
+++	.prog = {
+++		.bpf_func = __bpf_prog_ret1,
+++	},
+++};
+++
+++/* to avoid allocating empty bpf_prog_array for cgroups that
+++ * don't have bpf program attached use one global 'empty_prog_array'
+++ * It will not be modified the caller of bpf_prog_array_alloc()
+++ * (since caller requested prog_cnt == 0)
+++ * that pointer should be 'freed' by bpf_prog_array_free()
+++ */
+++static struct {
+++	struct bpf_prog_array hdr;
+++	struct bpf_prog *null_prog;
+++} empty_prog_array = {
+++	.null_prog = NULL,
+++};
+++
+++struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
+++{
+++	if (prog_cnt)
+++		return kzalloc(sizeof(struct bpf_prog_array) +
+++			       sizeof(struct bpf_prog_array_item) *
+++			       (prog_cnt + 1),
+++			       flags);
+++
+++	return &empty_prog_array.hdr;
+++}
+++
+++void bpf_prog_array_free(struct bpf_prog_array *progs)
+++{
+++	if (!progs || progs == &empty_prog_array.hdr)
+++		return;
+++	kfree_rcu(progs, rcu);
+++}
+++
+++int bpf_prog_array_length(struct bpf_prog_array *array)
+++{
+++	struct bpf_prog_array_item *item;
+++	u32 cnt = 0;
+++
+++	for (item = array->items; item->prog; item++)
+++		if (item->prog != &dummy_bpf_prog.prog)
+++			cnt++;
+++	return cnt;
+++}
+++
+++bool bpf_prog_array_is_empty(struct bpf_prog_array *array)
+++{
+++	struct bpf_prog_array_item *item;
+++
+++	for (item = array->items; item->prog; item++)
+++		if (item->prog != &dummy_bpf_prog.prog)
+++			return false;
+++	return true;
+++}
+++
+++static bool bpf_prog_array_copy_core(struct bpf_prog_array *array,
+++				     u32 *prog_ids,
+++				     u32 request_cnt)
+++{
+++	struct bpf_prog_array_item *item;
+++	int i = 0;
+++
+++	for (item = array->items; item->prog; item++) {
+++		if (item->prog == &dummy_bpf_prog.prog)
+++			continue;
+++		prog_ids[i] = item->prog->aux->id;
+++		if (++i == request_cnt) {
+++			item++;
+++			break;
+++		}
+++	}
+++
+++	return !!(item->prog);
+++}
+++
+++int bpf_prog_array_copy_to_user(struct bpf_prog_array *array,
+++				__u32 __user *prog_ids, u32 cnt)
+++{
+++	unsigned long err = 0;
+++	bool nospc;
+++	u32 *ids;
+++
+++	/* users of this function are doing:
+++	 * cnt = bpf_prog_array_length();
+++	 * if (cnt > 0)
+++	 *     bpf_prog_array_copy_to_user(..., cnt);
+++	 * so below kcalloc doesn't need extra cnt > 0 check.
+++	 */
+++	ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN);
+++	if (!ids)
+++		return -ENOMEM;
+++	nospc = bpf_prog_array_copy_core(array, ids, cnt);
+++	err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
+++	kfree(ids);
+++	if (err)
+++		return -EFAULT;
+++	if (nospc)
+++		return -ENOSPC;
+++	return 0;
+++}
+++
+++void bpf_prog_array_delete_safe(struct bpf_prog_array *array,
+++				struct bpf_prog *old_prog)
+++{
+++	struct bpf_prog_array_item *item;
+++
+++	for (item = array->items; item->prog; item++)
+++		if (item->prog == old_prog) {
+++			WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
+++			break;
+++		}
+++}
+++
+++int bpf_prog_array_copy(struct bpf_prog_array *old_array,
+++			struct bpf_prog *exclude_prog,
+++			struct bpf_prog *include_prog,
+++			struct bpf_prog_array **new_array)
+++{
+++	int new_prog_cnt, carry_prog_cnt = 0;
+++	struct bpf_prog_array_item *existing;
+++	struct bpf_prog_array *array;
+++	bool found_exclude = false;
+++	int new_prog_idx = 0;
+++
+++	/* Figure out how many existing progs we need to carry over to
+++	 * the new array.
+++	 */
+++	if (old_array) {
+++		existing = old_array->items;
+++		for (; existing->prog; existing++) {
+++			if (existing->prog == exclude_prog) {
+++				found_exclude = true;
+++				continue;
+++			}
+++			if (existing->prog != &dummy_bpf_prog.prog)
+++				carry_prog_cnt++;
+++			if (existing->prog == include_prog)
+++				return -EEXIST;
+++		}
+++	}
+++
+++	if (exclude_prog && !found_exclude)
+++		return -ENOENT;
+++
+++	/* How many progs (not NULL) will be in the new array? */
+++	new_prog_cnt = carry_prog_cnt;
+++	if (include_prog)
+++		new_prog_cnt += 1;
+++
+++	/* Do we have any prog (not NULL) in the new array? */
+++	if (!new_prog_cnt) {
+++		*new_array = NULL;
+++		return 0;
+++	}
+++
+++	/* +1 as the end of prog_array is marked with NULL */
+++	array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
+++	if (!array)
+++		return -ENOMEM;
+++
+++	/* Fill in the new prog array */
+++	if (carry_prog_cnt) {
+++		existing = old_array->items;
+++		for (; existing->prog; existing++)
+++			if (existing->prog != exclude_prog &&
+++			    existing->prog != &dummy_bpf_prog.prog) {
+++				array->items[new_prog_idx++].prog =
+++					existing->prog;
+++			}
+++	}
+++	if (include_prog)
+++		array->items[new_prog_idx++].prog = include_prog;
+++	array->items[new_prog_idx].prog = NULL;
+++	*new_array = array;
+++	return 0;
+++}
+++
+++int bpf_prog_array_copy_info(struct bpf_prog_array *array,
+++			     u32 *prog_ids, u32 request_cnt,
+++			     u32 *prog_cnt)
+++{
+++	u32 cnt = 0;
+++
+++	if (array)
+++		cnt = bpf_prog_array_length(array);
+++
+++	*prog_cnt = cnt;
+++
+++	/* return early if user requested only program count or nothing to copy */
+++	if (!request_cnt || !cnt)
+++		return 0;
+++
+++	/* this function is called under trace/bpf_trace.c: bpf_event_mutex */
+++	return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC
+++								     : 0;
+++}
+++
++ static void bpf_prog_free_deferred(struct work_struct *work)
++ {
++ 	struct bpf_prog_aux *aux;
+++	int i;
++ 
++ 	aux = container_of(work, struct bpf_prog_aux, work);
++-	bpf_jit_free(aux->prog);
+++#ifdef CONFIG_PERF_EVENTS
+++	if (aux->prog->has_callchain_buf)
+++		put_callchain_buffers();
+++#endif
+++	for (i = 0; i < aux->func_cnt; i++)
+++		bpf_jit_free(aux->func[i]);
+++	if (aux->func_cnt) {
+++		kfree(aux->func);
+++		bpf_prog_unlock_free(aux->prog);
+++	} else {
+++		bpf_jit_free(aux->prog);
+++	}
++ }
++ 
++ /* Free internal BPF program */
++@@ -740,7 +2026,7 @@ void bpf_user_rnd_init_once(void)
++ 	prandom_init_once(&bpf_user_rnd_state);
++ }
++ 
++-u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_0(bpf_user_rnd_u32)
++ {
++ 	/* Should someone ever have the rather unwise idea to use some
++ 	 * of the registers passed into this function, then note that
++@@ -753,7 +2039,7 @@ u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64
++ 
++ 	state = &get_cpu_var(bpf_user_rnd_state);
++ 	res = prandom_u32_state(state);
++-	put_cpu_var(state);
+++	put_cpu_var(bpf_user_rnd_state);
++ 
++ 	return res;
++ }
++@@ -762,18 +2048,36 @@ u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64
++ const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
++ const struct bpf_func_proto bpf_map_update_elem_proto __weak;
++ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
+++const struct bpf_func_proto bpf_map_push_elem_proto __weak;
+++const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
+++const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
+++const struct bpf_func_proto bpf_spin_lock_proto __weak;
+++const struct bpf_func_proto bpf_spin_unlock_proto __weak;
++ 
++ const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
++ const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
+++const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
++ const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
+++
++ const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
++ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
++ const struct bpf_func_proto bpf_get_current_comm_proto __weak;
+++const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
+++const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+++
++ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
++ {
++ 	return NULL;
++ }
++ 
+++u64 __weak
+++bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
+++		 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
+++{
+++	return -ENOTSUPP;
+++}
+++EXPORT_SYMBOL_GPL(bpf_event_output);
+++
++ /* Always built-in helper functions. */
++ const struct bpf_func_proto bpf_tail_call_proto = {
++ 	.func		= NULL,
++@@ -784,9 +2088,34 @@ const struct bpf_func_proto bpf_tail_cal
++ 	.arg3_type	= ARG_ANYTHING,
++ };
++ 
++-/* For classic BPF JITs that don't implement bpf_int_jit_compile(). */
++-void __weak bpf_int_jit_compile(struct bpf_prog *prog)
+++/* Stub for JITs that only support cBPF. eBPF programs are interpreted.
+++ * It is encouraged to implement bpf_int_jit_compile() instead, so that
+++ * eBPF and implicitly also cBPF can get JITed!
+++ */
+++struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
++ {
+++	return prog;
+++}
+++
+++/* Stub for JITs that support eBPF. All cBPF code gets transformed into
+++ * eBPF by the kernel and is later compiled by bpf_int_jit_compile().
+++ */
+++void __weak bpf_jit_compile(struct bpf_prog *prog)
+++{
+++}
+++
+++bool __weak bpf_helper_changes_pkt_data(void *func)
+++{
+++	return false;
+++}
+++
+++/* Return TRUE if the JIT backend wants verifier to enable sub-register usage
+++ * analysis code and wants explicit zero extension inserted by verifier.
+++ * Otherwise, return FALSE.
+++ */
+++bool __weak bpf_jit_needs_zext(void)
+++{
+++	return false;
++ }
++ 
++ /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
++@@ -797,3 +2126,13 @@ int __weak skb_copy_bits(const struct sk
++ {
++ 	return -EFAULT;
++ }
+++
+++DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
+++EXPORT_SYMBOL(bpf_stats_enabled_key);
+++
+++/* All definitions of tracepoints related to BPF. */
+++#define CREATE_TRACE_POINTS
+++#include <linux/bpf_trace.h>
+++
+++EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
+++EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx);
++--- /dev/null
+++++ b/kernel/bpf/devmap.c
++@@ -0,0 +1,698 @@
+++// SPDX-License-Identifier: GPL-2.0-only
+++/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+++ */
+++
+++/* Devmaps primary use is as a backend map for XDP BPF helper call
+++ * bpf_redirect_map(). Because XDP is mostly concerned with performance we
+++ * spent some effort to ensure the datapath with redirect maps does not use
+++ * any locking. This is a quick note on the details.
+++ *
+++ * We have three possible paths to get into the devmap control plane bpf
+++ * syscalls, bpf programs, and driver side xmit/flush operations. A bpf syscall
+++ * will invoke an update, delete, or lookup operation. To ensure updates and
+++ * deletes appear atomic from the datapath side xchg() is used to modify the
+++ * netdev_map array. Then because the datapath does a lookup into the netdev_map
+++ * array (read-only) from an RCU critical section we use call_rcu() to wait for
+++ * an rcu grace period before free'ing the old data structures. This ensures the
+++ * datapath always has a valid copy. However, the datapath does a "flush"
+++ * operation that pushes any pending packets in the driver outside the RCU
+++ * critical section. Each bpf_dtab_netdev tracks these pending operations using
+++ * a per-cpu flush list. The bpf_dtab_netdev object will not be destroyed  until
+++ * this list is empty, indicating outstanding flush operations have completed.
+++ *
+++ * BPF syscalls may race with BPF program calls on any of the update, delete
+++ * or lookup operations. As noted above the xchg() operation also keep the
+++ * netdev_map consistent in this case. From the devmap side BPF programs
+++ * calling into these operations are the same as multiple user space threads
+++ * making system calls.
+++ *
+++ * Finally, any of the above may race with a netdev_unregister notifier. The
+++ * unregister notifier must search for net devices in the map structure that
+++ * contain a reference to the net device and remove them. This is a two step
+++ * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b)
+++ * check to see if the ifindex is the same as the net_device being removed.
+++ * When removing the dev a cmpxchg() is used to ensure the correct dev is
+++ * removed, in the case of a concurrent update or delete operation it is
+++ * possible that the initially referenced dev is no longer in the map. As the
+++ * notifier hook walks the map we know that new dev references can not be
+++ * added by the user because core infrastructure ensures dev_get_by_index()
+++ * calls will fail at this point.
+++ *
+++ * The devmap_hash type is a map type which interprets keys as ifindexes and
+++ * indexes these using a hashmap. This allows maps that use ifindex as key to be
+++ * densely packed instead of having holes in the lookup array for unused
+++ * ifindexes. The setup and packet enqueue/send code is shared between the two
+++ * types of devmap; only the lookup and insertion is different.
+++ */
+++#include <linux/bpf.h>
+++#include <net/xdp.h>
+++#include <linux/filter.h>
+++#include <trace/events/xdp.h>
+++
+++#define DEV_CREATE_FLAG_MASK \
+++	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+++
+++#define DEV_MAP_BULK_SIZE 16
+++struct bpf_dtab_netdev;
+++
+++struct xdp_bulk_queue {
+++	struct xdp_frame *q[DEV_MAP_BULK_SIZE];
+++	struct list_head flush_node;
+++	struct net_device *dev_rx;
+++	struct bpf_dtab_netdev *obj;
+++	unsigned int count;
+++};
+++
+++struct bpf_dtab_netdev {
+++	struct net_device *dev; /* must be first member, due to tracepoint */
+++	struct hlist_node index_hlist;
+++	struct bpf_dtab *dtab;
+++	struct xdp_bulk_queue __percpu *bulkq;
+++	struct rcu_head rcu;
+++	unsigned int idx; /* keep track of map index for tracepoint */
+++};
+++
+++struct bpf_dtab {
+++	struct bpf_map map;
+++	struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */
+++	struct list_head __percpu *flush_list;
+++	struct list_head list;
+++
+++	/* these are only used for DEVMAP_HASH type maps */
+++	struct hlist_head *dev_index_head;
+++	spinlock_t index_lock;
+++	unsigned int items;
+++	u32 n_buckets;
+++};
+++
+++static DEFINE_SPINLOCK(dev_map_lock);
+++static LIST_HEAD(dev_map_list);
+++
+++static struct hlist_head *dev_map_create_hash(unsigned int entries,
+++					      int numa_node)
+++{
+++	int i;
+++	struct hlist_head *hash;
+++
+++	hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node);
+++	if (hash != NULL)
+++		for (i = 0; i < entries; i++)
+++			INIT_HLIST_HEAD(&hash[i]);
+++
+++	return hash;
+++}
+++
+++static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
+++						    int idx)
+++{
+++	return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
+++}
+++
+++static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
+++{
+++	int err, cpu;
+++	u64 cost;
+++
+++	/* check sanity of attributes */
+++	if (attr->max_entries == 0 || attr->key_size != 4 ||
+++	    attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
+++		return -EINVAL;
+++
+++	/* Lookup returns a pointer straight to dev->ifindex, so make sure the
+++	 * verifier prevents writes from the BPF side
+++	 */
+++	attr->map_flags |= BPF_F_RDONLY_PROG;
+++
+++
+++	bpf_map_init_from_attr(&dtab->map, attr);
+++
+++	/* make sure page count doesn't overflow */
+++	cost = (u64) sizeof(struct list_head) * num_possible_cpus();
+++
+++	if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+++		dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
+++
+++		if (!dtab->n_buckets) /* Overflow check */
+++			return -EINVAL;
+++		cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets;
+++	} else {
+++		cost += (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
+++	}
+++
+++	/* if map size is larger than memlock limit, reject it */
+++	err = bpf_map_charge_init(&dtab->map.memory, cost);
+++	if (err)
+++		return -EINVAL;
+++
+++	dtab->flush_list = alloc_percpu(struct list_head);
+++	if (!dtab->flush_list)
+++		goto free_charge;
+++
+++	for_each_possible_cpu(cpu)
+++		INIT_LIST_HEAD(per_cpu_ptr(dtab->flush_list, cpu));
+++
+++	if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+++		dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
+++							   dtab->map.numa_node);
+++		if (!dtab->dev_index_head)
+++			goto free_percpu;
+++
+++		spin_lock_init(&dtab->index_lock);
+++	} else {
+++		dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
+++						      sizeof(struct bpf_dtab_netdev *),
+++						      dtab->map.numa_node);
+++		if (!dtab->netdev_map)
+++			goto free_percpu;
+++	}
+++
+++	return 0;
+++
+++free_percpu:
+++	free_percpu(dtab->flush_list);
+++free_charge:
+++	bpf_map_charge_finish(&dtab->map.memory);
+++	return -ENOMEM;
+++}
+++
+++static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
+++{
+++	struct bpf_dtab *dtab;
+++	int err;
+++
+++	if (!capable(CAP_NET_ADMIN))
+++		return ERR_PTR(-EPERM);
+++
+++	dtab = kzalloc(sizeof(*dtab), GFP_USER);
+++	if (!dtab)
+++		return ERR_PTR(-ENOMEM);
+++
+++	err = dev_map_init_map(dtab, attr);
+++	if (err) {
+++		kfree(dtab);
+++		return ERR_PTR(err);
+++	}
+++
+++	spin_lock(&dev_map_lock);
+++	list_add_tail_rcu(&dtab->list, &dev_map_list);
+++	spin_unlock(&dev_map_lock);
+++
+++	return &dtab->map;
+++}
+++
+++static void dev_map_free(struct bpf_map *map)
+++{
+++	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+++	int i, cpu;
+++
+++	/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+++	 * so the programs (can be more than one that used this map) were
+++	 * disconnected from events. Wait for outstanding critical sections in
+++	 * these programs to complete. The rcu critical section only guarantees
+++	 * no further reads against netdev_map. It does __not__ ensure pending
+++	 * flush operations (if any) are complete.
+++	 */
+++
+++	spin_lock(&dev_map_lock);
+++	list_del_rcu(&dtab->list);
+++	spin_unlock(&dev_map_lock);
+++
+++	bpf_clear_redirect_map(map);
+++	synchronize_rcu();
+++
+++	/* Make sure prior __dev_map_entry_free() have completed. */
+++	rcu_barrier();
+++
+++	/* To ensure all pending flush operations have completed wait for flush
+++	 * list to empty on _all_ cpus.
+++	 * Because the above synchronize_rcu() ensures the map is disconnected
+++	 * from the program we can assume no new items will be added.
+++	 */
+++	for_each_online_cpu(cpu) {
+++		struct list_head *flush_list = per_cpu_ptr(dtab->flush_list, cpu);
+++
+++		while (!list_empty(flush_list))
+++			cond_resched();
+++	}
+++
+++	if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+++		for (i = 0; i < dtab->n_buckets; i++) {
+++			struct bpf_dtab_netdev *dev;
+++			struct hlist_head *head;
+++			struct hlist_node *next;
+++
+++			head = dev_map_index_hash(dtab, i);
+++
+++			hlist_for_each_entry_safe(dev, next, head, index_hlist) {
+++				hlist_del_rcu(&dev->index_hlist);
+++				free_percpu(dev->bulkq);
+++				dev_put(dev->dev);
+++				kfree(dev);
+++			}
+++		}
+++
+++		bpf_map_area_free(dtab->dev_index_head);
+++	} else {
+++		for (i = 0; i < dtab->map.max_entries; i++) {
+++			struct bpf_dtab_netdev *dev;
+++
+++			dev = dtab->netdev_map[i];
+++			if (!dev)
+++				continue;
+++
+++			free_percpu(dev->bulkq);
+++			dev_put(dev->dev);
+++			kfree(dev);
+++		}
+++
+++		bpf_map_area_free(dtab->netdev_map);
+++	}
+++
+++	free_percpu(dtab->flush_list);
+++	kfree(dtab);
+++}
+++
+++static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+++{
+++	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+++	u32 index = key ? *(u32 *)key : U32_MAX;
+++	u32 *next = next_key;
+++
+++	if (index >= dtab->map.max_entries) {
+++		*next = 0;
+++		return 0;
+++	}
+++
+++	if (index == dtab->map.max_entries - 1)
+++		return -ENOENT;
+++	*next = index + 1;
+++	return 0;
+++}
+++
+++struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
+++{
+++	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+++	struct hlist_head *head = dev_map_index_hash(dtab, key);
+++	struct bpf_dtab_netdev *dev;
+++
+++	hlist_for_each_entry_rcu(dev, head, index_hlist)
+++		if (dev->idx == key)
+++			return dev;
+++
+++	return NULL;
+++}
+++
+++static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
+++				    void *next_key)
+++{
+++	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+++	u32 idx, *next = next_key;
+++	struct bpf_dtab_netdev *dev, *next_dev;
+++	struct hlist_head *head;
+++	int i = 0;
+++
+++	if (!key)
+++		goto find_first;
+++
+++	idx = *(u32 *)key;
+++
+++	dev = __dev_map_hash_lookup_elem(map, idx);
+++	if (!dev)
+++		goto find_first;
+++
+++	next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)),
+++				    struct bpf_dtab_netdev, index_hlist);
+++
+++	if (next_dev) {
+++		*next = next_dev->idx;
+++		return 0;
+++	}
+++
+++	i = idx & (dtab->n_buckets - 1);
+++	i++;
+++
+++ find_first:
+++	for (; i < dtab->n_buckets; i++) {
+++		head = dev_map_index_hash(dtab, i);
+++
+++		next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+++					    struct bpf_dtab_netdev,
+++					    index_hlist);
+++		if (next_dev) {
+++			*next = next_dev->idx;
+++			return 0;
+++		}
+++	}
+++
+++	return -ENOENT;
+++}
+++
+++/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
+++ * from the driver before returning from its napi->poll() routine. The poll()
+++ * routine is called either from busy_poll context or net_rx_action signaled
+++ * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
+++ * net device can be torn down. On devmap tear down we ensure the flush list
+++ * is empty before completing to ensure all flush operations have completed.
+++ */
+++void __dev_map_flush(struct bpf_map *map)
+++{
+++}
+++
+++/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
+++ * update happens in parallel here a dev_put wont happen until after reading the
+++ * ifindex.
+++ */
+++struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
+++{
+++	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+++	struct bpf_dtab_netdev *obj;
+++
+++	if (key >= map->max_entries)
+++		return NULL;
+++
+++	obj = READ_ONCE(dtab->netdev_map[key]);
+++	return obj;
+++}
+++
+++int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+++		    struct net_device *dev_rx)
+++{
+++	return -EOPNOTSUPP;
+++}
+++
+++int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
+++			     struct bpf_prog *xdp_prog)
+++{
+++	return -EOPNOTSUPP;
+++}
+++
+++static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
+++{
+++	struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
+++	struct net_device *dev = obj ? obj->dev : NULL;
+++
+++	return dev ? &dev->ifindex : NULL;
+++}
+++
+++static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
+++{
+++	struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
+++								*(u32 *)key);
+++	struct net_device *dev = obj ? obj->dev : NULL;
+++
+++	return dev ? &dev->ifindex : NULL;
+++}
+++
+++static void __dev_map_entry_free(struct rcu_head *rcu)
+++{
+++	struct bpf_dtab_netdev *dev;
+++
+++	dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
+++	free_percpu(dev->bulkq);
+++	dev_put(dev->dev);
+++	kfree(dev);
+++}
+++
+++static int dev_map_delete_elem(struct bpf_map *map, void *key)
+++{
+++	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+++	struct bpf_dtab_netdev *old_dev;
+++	int k = *(u32 *)key;
+++
+++	if (k >= map->max_entries)
+++		return -EINVAL;
+++
+++	/* Use call_rcu() here to ensure any rcu critical sections have
+++	 * completed, but this does not guarantee a flush has happened
+++	 * yet. Because driver side rcu_read_lock/unlock only protects the
+++	 * running XDP program. However, for pending flush operations the
+++	 * dev and ctx are stored in another per cpu map. And additionally,
+++	 * the driver tear down ensures all soft irqs are complete before
+++	 * removing the net device in the case of dev_put equals zero.
+++	 */
+++	old_dev = xchg(&dtab->netdev_map[k], NULL);
+++	if (old_dev)
+++		call_rcu(&old_dev->rcu, __dev_map_entry_free);
+++	return 0;
+++}
+++
+++static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
+++{
+++	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+++	struct bpf_dtab_netdev *old_dev;
+++	int k = *(u32 *)key;
+++	unsigned long flags;
+++	int ret = -ENOENT;
+++
+++	spin_lock_irqsave(&dtab->index_lock, flags);
+++
+++	old_dev = __dev_map_hash_lookup_elem(map, k);
+++	if (old_dev) {
+++		dtab->items--;
+++		hlist_del_init_rcu(&old_dev->index_hlist);
+++		call_rcu(&old_dev->rcu, __dev_map_entry_free);
+++		ret = 0;
+++	}
+++	spin_unlock_irqrestore(&dtab->index_lock, flags);
+++
+++	return ret;
+++}
+++
+++static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
+++						    struct bpf_dtab *dtab,
+++						    u32 ifindex,
+++						    unsigned int idx)
+++{
+++	gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+++	struct bpf_dtab_netdev *dev;
+++	struct xdp_bulk_queue *bq;
+++	int cpu;
+++
+++	dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node);
+++	if (!dev)
+++		return ERR_PTR(-ENOMEM);
+++
+++	dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
+++					sizeof(void *), gfp);
+++	if (!dev->bulkq) {
+++		kfree(dev);
+++		return ERR_PTR(-ENOMEM);
+++	}
+++
+++	for_each_possible_cpu(cpu) {
+++		bq = per_cpu_ptr(dev->bulkq, cpu);
+++		bq->obj = dev;
+++	}
+++
+++	dev->dev = dev_get_by_index(net, ifindex);
+++	if (!dev->dev) {
+++		free_percpu(dev->bulkq);
+++		kfree(dev);
+++		return ERR_PTR(-EINVAL);
+++	}
+++
+++	dev->idx = idx;
+++	dev->dtab = dtab;
+++
+++	return dev;
+++}
+++
+++static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
+++				 void *key, void *value, u64 map_flags)
+++{
+++	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+++	struct bpf_dtab_netdev *dev, *old_dev;
+++	u32 ifindex = *(u32 *)value;
+++	u32 i = *(u32 *)key;
+++
+++	if (unlikely(map_flags > BPF_EXIST))
+++		return -EINVAL;
+++	if (unlikely(i >= dtab->map.max_entries))
+++		return -E2BIG;
+++	if (unlikely(map_flags == BPF_NOEXIST))
+++		return -EEXIST;
+++
+++	if (!ifindex) {
+++		dev = NULL;
+++	} else {
+++		dev = __dev_map_alloc_node(net, dtab, ifindex, i);
+++		if (IS_ERR(dev))
+++			return PTR_ERR(dev);
+++	}
+++
+++	/* Use call_rcu() here to ensure rcu critical sections have completed
+++	 * Remembering the driver side flush operation will happen before the
+++	 * net device is removed.
+++	 */
+++	old_dev = xchg(&dtab->netdev_map[i], dev);
+++	if (old_dev)
+++		call_rcu(&old_dev->rcu, __dev_map_entry_free);
+++
+++	return 0;
+++}
+++
+++static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
+++			       u64 map_flags)
+++{
+++	return __dev_map_update_elem(current->nsproxy->net_ns,
+++				     map, key, value, map_flags);
+++}
+++
+++static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
+++				     void *key, void *value, u64 map_flags)
+++{
+++	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+++	struct bpf_dtab_netdev *dev, *old_dev;
+++	u32 ifindex = *(u32 *)value;
+++	u32 idx = *(u32 *)key;
+++	unsigned long flags;
+++	int err = -EEXIST;
+++
+++	if (unlikely(map_flags > BPF_EXIST || !ifindex))
+++		return -EINVAL;
+++
+++	spin_lock_irqsave(&dtab->index_lock, flags);
+++
+++	old_dev = __dev_map_hash_lookup_elem(map, idx);
+++	if (old_dev && (map_flags & BPF_NOEXIST))
+++		goto out_err;
+++
+++	dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
+++	if (IS_ERR(dev)) {
+++		err = PTR_ERR(dev);
+++		goto out_err;
+++	}
+++
+++	if (old_dev) {
+++		hlist_del_rcu(&old_dev->index_hlist);
+++	} else {
+++		if (dtab->items >= dtab->map.max_entries) {
+++			spin_unlock_irqrestore(&dtab->index_lock, flags);
+++			call_rcu(&dev->rcu, __dev_map_entry_free);
+++			return -E2BIG;
+++		}
+++		dtab->items++;
+++	}
+++
+++	hlist_add_head_rcu(&dev->index_hlist,
+++			   dev_map_index_hash(dtab, idx));
+++	spin_unlock_irqrestore(&dtab->index_lock, flags);
+++
+++	if (old_dev)
+++		call_rcu(&old_dev->rcu, __dev_map_entry_free);
+++
+++	return 0;
+++
+++out_err:
+++	spin_unlock_irqrestore(&dtab->index_lock, flags);
+++	return err;
+++}
+++
+++static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
+++				   u64 map_flags)
+++{
+++	return __dev_map_hash_update_elem(current->nsproxy->net_ns,
+++					 map, key, value, map_flags);
+++}
+++
+++const struct bpf_map_ops dev_map_ops = {
+++	.map_alloc = dev_map_alloc,
+++	.map_free = dev_map_free,
+++	.map_get_next_key = dev_map_get_next_key,
+++	.map_lookup_elem = dev_map_lookup_elem,
+++	.map_update_elem = dev_map_update_elem,
+++	.map_delete_elem = dev_map_delete_elem,
+++	.map_check_btf = map_check_no_btf,
+++};
+++
+++const struct bpf_map_ops dev_map_hash_ops = {
+++	.map_alloc = dev_map_alloc,
+++	.map_free = dev_map_free,
+++	.map_get_next_key = dev_map_hash_get_next_key,
+++	.map_lookup_elem = dev_map_hash_lookup_elem,
+++	.map_update_elem = dev_map_hash_update_elem,
+++	.map_delete_elem = dev_map_hash_delete_elem,
+++	.map_check_btf = map_check_no_btf,
+++};
+++
+++static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,
+++				       struct net_device *netdev)
+++{
+++	unsigned long flags;
+++	u32 i;
+++
+++	spin_lock_irqsave(&dtab->index_lock, flags);
+++	for (i = 0; i < dtab->n_buckets; i++) {
+++		struct bpf_dtab_netdev *dev;
+++		struct hlist_head *head;
+++		struct hlist_node *next;
+++
+++		head = dev_map_index_hash(dtab, i);
+++
+++		hlist_for_each_entry_safe(dev, next, head, index_hlist) {
+++			if (netdev != dev->dev)
+++				continue;
+++
+++			dtab->items--;
+++			hlist_del_rcu(&dev->index_hlist);
+++			call_rcu(&dev->rcu, __dev_map_entry_free);
+++		}
+++	}
+++	spin_unlock_irqrestore(&dtab->index_lock, flags);
+++}
+++
+++static int dev_map_notification(struct notifier_block *notifier,
+++				ulong event, void *ptr)
+++{
+++	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+++	struct bpf_dtab *dtab;
+++	int i;
+++
+++	switch (event) {
+++	case NETDEV_UNREGISTER:
+++		/* This rcu_read_lock/unlock pair is needed because
+++		 * dev_map_list is an RCU list AND to ensure a delete
+++		 * operation does not free a netdev_map entry while we
+++		 * are comparing it against the netdev being unregistered.
+++		 */
+++		rcu_read_lock();
+++		list_for_each_entry_rcu(dtab, &dev_map_list, list) {
+++			if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+++				dev_map_hash_remove_netdev(dtab, netdev);
+++				continue;
+++			}
+++
+++			for (i = 0; i < dtab->map.max_entries; i++) {
+++				struct bpf_dtab_netdev *dev, *odev;
+++
+++				dev = READ_ONCE(dtab->netdev_map[i]);
+++				if (!dev || netdev != dev->dev)
+++					continue;
+++				odev = cmpxchg(&dtab->netdev_map[i], dev, NULL);
+++				if (dev == odev)
+++					call_rcu(&dev->rcu,
+++						 __dev_map_entry_free);
+++			}
+++		}
+++		rcu_read_unlock();
+++		break;
+++	default:
+++		break;
+++	}
+++	return NOTIFY_OK;
+++}
+++
+++static struct notifier_block dev_map_notifier = {
+++	.notifier_call = dev_map_notification,
+++};
+++
+++static int __init dev_map_init(void)
+++{
+++	/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
+++	BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
+++		     offsetof(struct _bpf_dtab_netdev, dev));
+++	register_netdevice_notifier(&dev_map_notifier);
+++	return 0;
+++}
+++
+++subsys_initcall(dev_map_init);
++--- /dev/null
+++++ b/kernel/bpf/disasm.c
++@@ -0,0 +1,258 @@
+++// SPDX-License-Identifier: GPL-2.0-only
+++/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+++ * Copyright (c) 2016 Facebook
+++ */
+++
+++#include <linux/bpf.h>
+++
+++#include "disasm.h"
+++
+++#define __BPF_FUNC_STR_FN(x) [BPF_FUNC_ ## x] = __stringify(bpf_ ## x)
+++static const char * const func_id_str[] = {
+++	__BPF_FUNC_MAPPER(__BPF_FUNC_STR_FN)
+++};
+++#undef __BPF_FUNC_STR_FN
+++
+++static const char *__func_get_name(const struct bpf_insn_cbs *cbs,
+++				   const struct bpf_insn *insn,
+++				   char *buff, size_t len)
+++{
+++	BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
+++
+++	if (insn->src_reg != BPF_PSEUDO_CALL &&
+++	    insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID &&
+++	    func_id_str[insn->imm])
+++		return func_id_str[insn->imm];
+++
+++	if (cbs && cbs->cb_call)
+++		return cbs->cb_call(cbs->private_data, insn);
+++
+++	if (insn->src_reg == BPF_PSEUDO_CALL)
+++		snprintf(buff, len, "%+d", insn->imm);
+++
+++	return buff;
+++}
+++
+++static const char *__func_imm_name(const struct bpf_insn_cbs *cbs,
+++				   const struct bpf_insn *insn,
+++				   u64 full_imm, char *buff, size_t len)
+++{
+++	if (cbs && cbs->cb_imm)
+++		return cbs->cb_imm(cbs->private_data, insn, full_imm);
+++
+++	snprintf(buff, len, "0x%llx", (unsigned long long)full_imm);
+++	return buff;
+++}
+++
+++const char *func_id_name(int id)
+++{
+++	if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id])
+++		return func_id_str[id];
+++	else
+++		return "unknown";
+++}
+++
+++const char *const bpf_class_string[8] = {
+++	[BPF_LD]    = "ld",
+++	[BPF_LDX]   = "ldx",
+++	[BPF_ST]    = "st",
+++	[BPF_STX]   = "stx",
+++	[BPF_ALU]   = "alu",
+++	[BPF_JMP]   = "jmp",
+++	[BPF_JMP32] = "jmp32",
+++	[BPF_ALU64] = "alu64",
+++};
+++
+++const char *const bpf_alu_string[16] = {
+++	[BPF_ADD >> 4]  = "+=",
+++	[BPF_SUB >> 4]  = "-=",
+++	[BPF_MUL >> 4]  = "*=",
+++	[BPF_DIV >> 4]  = "/=",
+++	[BPF_OR  >> 4]  = "|=",
+++	[BPF_AND >> 4]  = "&=",
+++	[BPF_LSH >> 4]  = "<<=",
+++	[BPF_RSH >> 4]  = ">>=",
+++	[BPF_NEG >> 4]  = "neg",
+++	[BPF_MOD >> 4]  = "%=",
+++	[BPF_XOR >> 4]  = "^=",
+++	[BPF_MOV >> 4]  = "=",
+++	[BPF_ARSH >> 4] = "s>>=",
+++	[BPF_END >> 4]  = "endian",
+++};
+++
+++static const char *const bpf_ldst_string[] = {
+++	[BPF_W >> 3]  = "u32",
+++	[BPF_H >> 3]  = "u16",
+++	[BPF_B >> 3]  = "u8",
+++	[BPF_DW >> 3] = "u64",
+++};
+++
+++static const char *const bpf_jmp_string[16] = {
+++	[BPF_JA >> 4]   = "jmp",
+++	[BPF_JEQ >> 4]  = "==",
+++	[BPF_JGT >> 4]  = ">",
+++	[BPF_JLT >> 4]  = "<",
+++	[BPF_JGE >> 4]  = ">=",
+++	[BPF_JLE >> 4]  = "<=",
+++	[BPF_JSET >> 4] = "&",
+++	[BPF_JNE >> 4]  = "!=",
+++	[BPF_JSGT >> 4] = "s>",
+++	[BPF_JSLT >> 4] = "s<",
+++	[BPF_JSGE >> 4] = "s>=",
+++	[BPF_JSLE >> 4] = "s<=",
+++	[BPF_CALL >> 4] = "call",
+++	[BPF_EXIT >> 4] = "exit",
+++};
+++
+++static void print_bpf_end_insn(bpf_insn_print_t verbose,
+++			       void *private_data,
+++			       const struct bpf_insn *insn)
+++{
+++	verbose(private_data, "(%02x) r%d = %s%d r%d\n",
+++		insn->code, insn->dst_reg,
+++		BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le",
+++		insn->imm, insn->dst_reg);
+++}
+++
+++void print_bpf_insn(const struct bpf_insn_cbs *cbs,
+++		    const struct bpf_insn *insn,
+++		    bool allow_ptr_leaks)
+++{
+++	const bpf_insn_print_t verbose = cbs->cb_print;
+++	u8 class = BPF_CLASS(insn->code);
+++
+++	if (class == BPF_ALU || class == BPF_ALU64) {
+++		if (BPF_OP(insn->code) == BPF_END) {
+++			if (class == BPF_ALU64)
+++				verbose(cbs->private_data, "BUG_alu64_%02x\n", insn->code);
+++			else
+++				print_bpf_end_insn(verbose, cbs->private_data, insn);
+++		} else if (BPF_OP(insn->code) == BPF_NEG) {
+++			verbose(cbs->private_data, "(%02x) %c%d = -%c%d\n",
+++				insn->code, class == BPF_ALU ? 'w' : 'r',
+++				insn->dst_reg, class == BPF_ALU ? 'w' : 'r',
+++				insn->dst_reg);
+++		} else if (BPF_SRC(insn->code) == BPF_X) {
+++			verbose(cbs->private_data, "(%02x) %c%d %s %c%d\n",
+++				insn->code, class == BPF_ALU ? 'w' : 'r',
+++				insn->dst_reg,
+++				bpf_alu_string[BPF_OP(insn->code) >> 4],
+++				class == BPF_ALU ? 'w' : 'r',
+++				insn->src_reg);
+++		} else {
+++			verbose(cbs->private_data, "(%02x) %c%d %s %d\n",
+++				insn->code, class == BPF_ALU ? 'w' : 'r',
+++				insn->dst_reg,
+++				bpf_alu_string[BPF_OP(insn->code) >> 4],
+++				insn->imm);
+++		}
+++	} else if (class == BPF_STX) {
+++		if (BPF_MODE(insn->code) == BPF_MEM)
+++			verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = r%d\n",
+++				insn->code,
+++				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+++				insn->dst_reg,
+++				insn->off, insn->src_reg);
+++		else if (BPF_MODE(insn->code) == BPF_XADD)
+++			verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) += r%d\n",
+++				insn->code,
+++				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+++				insn->dst_reg, insn->off,
+++				insn->src_reg);
+++		else
+++			verbose(cbs->private_data, "BUG_%02x\n", insn->code);
+++	} else if (class == BPF_ST) {
+++		if (BPF_MODE(insn->code) != BPF_MEM) {
+++			verbose(cbs->private_data, "BUG_st_%02x\n", insn->code);
+++			return;
+++		}
+++		verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
+++			insn->code,
+++			bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+++			insn->dst_reg,
+++			insn->off, insn->imm);
+++	} else if (class == BPF_LDX) {
+++		if (BPF_MODE(insn->code) != BPF_MEM) {
+++			verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code);
+++			return;
+++		}
+++		verbose(cbs->private_data, "(%02x) r%d = *(%s *)(r%d %+d)\n",
+++			insn->code, insn->dst_reg,
+++			bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+++			insn->src_reg, insn->off);
+++	} else if (class == BPF_LD) {
+++		if (BPF_MODE(insn->code) == BPF_ABS) {
+++			verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[%d]\n",
+++				insn->code,
+++				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+++				insn->imm);
+++		} else if (BPF_MODE(insn->code) == BPF_IND) {
+++			verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[r%d + %d]\n",
+++				insn->code,
+++				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+++				insn->src_reg, insn->imm);
+++		} else if (BPF_MODE(insn->code) == BPF_IMM &&
+++			   BPF_SIZE(insn->code) == BPF_DW) {
+++			/* At this point, we already made sure that the second
+++			 * part of the ldimm64 insn is accessible.
+++			 */
+++			u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+++			bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD ||
+++				      insn->src_reg == BPF_PSEUDO_MAP_VALUE;
+++			char tmp[64];
+++
+++			if (is_ptr && !allow_ptr_leaks)
+++				imm = 0;
+++
+++			verbose(cbs->private_data, "(%02x) r%d = %s\n",
+++				insn->code, insn->dst_reg,
+++				__func_imm_name(cbs, insn, imm,
+++						tmp, sizeof(tmp)));
+++		} else {
+++			verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code);
+++			return;
+++		}
+++	} else if (class == BPF_JMP32 || class == BPF_JMP) {
+++		u8 opcode = BPF_OP(insn->code);
+++
+++		if (opcode == BPF_CALL) {
+++			char tmp[64];
+++
+++			if (insn->src_reg == BPF_PSEUDO_CALL) {
+++				verbose(cbs->private_data, "(%02x) call pc%s\n",
+++					insn->code,
+++					__func_get_name(cbs, insn,
+++							tmp, sizeof(tmp)));
+++			} else {
+++				strcpy(tmp, "unknown");
+++				verbose(cbs->private_data, "(%02x) call %s#%d\n", insn->code,
+++					__func_get_name(cbs, insn,
+++							tmp, sizeof(tmp)),
+++					insn->imm);
+++			}
+++		} else if (insn->code == (BPF_JMP | BPF_JA)) {
+++			verbose(cbs->private_data, "(%02x) goto pc%+d\n",
+++				insn->code, insn->off);
+++		} else if (insn->code == (BPF_JMP | BPF_EXIT)) {
+++			verbose(cbs->private_data, "(%02x) exit\n", insn->code);
+++		} else if (BPF_SRC(insn->code) == BPF_X) {
+++			verbose(cbs->private_data,
+++				"(%02x) if %c%d %s %c%d goto pc%+d\n",
+++				insn->code, class == BPF_JMP32 ? 'w' : 'r',
+++				insn->dst_reg,
+++				bpf_jmp_string[BPF_OP(insn->code) >> 4],
+++				class == BPF_JMP32 ? 'w' : 'r',
+++				insn->src_reg, insn->off);
+++		} else {
+++			verbose(cbs->private_data,
+++				"(%02x) if %c%d %s 0x%x goto pc%+d\n",
+++				insn->code, class == BPF_JMP32 ? 'w' : 'r',
+++				insn->dst_reg,
+++				bpf_jmp_string[BPF_OP(insn->code) >> 4],
+++				insn->imm, insn->off);
+++		}
+++	} else {
+++		verbose(cbs->private_data, "(%02x) %s\n",
+++			insn->code, bpf_class_string[class]);
+++	}
+++}
++--- /dev/null
+++++ b/kernel/bpf/disasm.h
++@@ -0,0 +1,40 @@
+++/* SPDX-License-Identifier: GPL-2.0-only */
+++/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+++ * Copyright (c) 2016 Facebook
+++ */
+++
+++#ifndef __BPF_DISASM_H__
+++#define __BPF_DISASM_H__
+++
+++#include <linux/bpf.h>
+++#include <linux/kernel.h>
+++#include <linux/stringify.h>
+++#ifndef __KERNEL__
+++#include <stdio.h>
+++#include <string.h>
+++#endif
+++
+++extern const char *const bpf_alu_string[16];
+++extern const char *const bpf_class_string[8];
+++
+++const char *func_id_name(int id);
+++
+++typedef __printf(2, 3) void (*bpf_insn_print_t)(void *private_data,
+++						const char *, ...);
+++typedef const char *(*bpf_insn_revmap_call_t)(void *private_data,
+++					      const struct bpf_insn *insn);
+++typedef const char *(*bpf_insn_print_imm_t)(void *private_data,
+++					    const struct bpf_insn *insn,
+++					    __u64 full_imm);
+++
+++struct bpf_insn_cbs {
+++	bpf_insn_print_t	cb_print;
+++	bpf_insn_revmap_call_t	cb_call;
+++	bpf_insn_print_imm_t	cb_imm;
+++	void			*private_data;
+++};
+++
+++void print_bpf_insn(const struct bpf_insn_cbs *cbs,
+++		    const struct bpf_insn *insn,
+++		    bool allow_ptr_leaks);
+++#endif
++--- a/kernel/bpf/hashtab.c
+++++ b/kernel/bpf/hashtab.c
++@@ -1,147 +1,467 @@
+++// SPDX-License-Identifier: GPL-2.0-only
++ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
++- *
++- * This program is free software; you can redistribute it and/or
++- * modify it under the terms of version 2 of the GNU General Public
++- * License as published by the Free Software Foundation.
++- *
++- * This program is distributed in the hope that it will be useful, but
++- * WITHOUT ANY WARRANTY; without even the implied warranty of
++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++- * General Public License for more details.
+++ * Copyright (c) 2016 Facebook
++  */
++ #include <linux/bpf.h>
+++#include <linux/btf.h>
++ #include <linux/jhash.h>
++ #include <linux/filter.h>
++-#include <linux/vmalloc.h>
+++#include <linux/rculist_nulls.h>
+++#include <linux/random.h>
+++#include <uapi/linux/btf.h>
+++#include "percpu_freelist.h"
+++#include "bpf_lru_list.h"
+++#include "map_in_map.h"
+++
+++#define HTAB_CREATE_FLAG_MASK						\
+++	(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE |	\
+++	 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
+++
+++struct bucket {
+++	struct hlist_nulls_head head;
+++	raw_spinlock_t lock;
+++};
++ 
++ struct bpf_htab {
++ 	struct bpf_map map;
++-	struct hlist_head *buckets;
++-	raw_spinlock_t lock;
++-	u32 count;	/* number of elements in this hashtable */
+++	struct bucket *buckets;
+++	void *elems;
+++	union {
+++		struct pcpu_freelist freelist;
+++		struct bpf_lru lru;
+++	};
+++	struct htab_elem *__percpu *extra_elems;
+++	atomic_t count;	/* number of elements in this hashtable */
++ 	u32 n_buckets;	/* number of hash buckets */
++ 	u32 elem_size;	/* size of each element in bytes */
+++	u32 hashrnd;
++ };
++ 
++ /* each htab element is struct htab_elem + key + value */
++ struct htab_elem {
++-	struct hlist_node hash_node;
++-	struct rcu_head rcu;
+++	union {
+++		struct hlist_nulls_node hash_node;
+++		struct {
+++			void *padding;
+++			union {
+++				struct bpf_htab *htab;
+++				struct pcpu_freelist_node fnode;
+++			};
+++		};
+++	};
+++	union {
+++		struct rcu_head rcu;
+++		struct bpf_lru_node lru_node;
+++	};
++ 	u32 hash;
++ 	char key[0] __aligned(8);
++ };
++ 
+++static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
+++
+++static bool htab_is_lru(const struct bpf_htab *htab)
+++{
+++	return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH ||
+++		htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+++}
+++
+++static bool htab_is_percpu(const struct bpf_htab *htab)
+++{
+++	return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+++		htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+++}
+++
+++static bool htab_is_prealloc(const struct bpf_htab *htab)
+++{
+++	return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
+++}
+++
+++static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
+++				     void __percpu *pptr)
+++{
+++	*(void __percpu **)(l->key + key_size) = pptr;
+++}
+++
+++static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
+++{
+++	return *(void __percpu **)(l->key + key_size);
+++}
+++
+++static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
+++{
+++	return *(void **)(l->key + roundup(map->key_size, 8));
+++}
+++
+++static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
+++{
+++	return (struct htab_elem *) (htab->elems + i * htab->elem_size);
+++}
+++
+++static void htab_free_elems(struct bpf_htab *htab)
+++{
+++	int i;
+++
+++	if (!htab_is_percpu(htab))
+++		goto free_elems;
+++
+++	for (i = 0; i < htab->map.max_entries; i++) {
+++		void __percpu *pptr;
+++
+++		pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
+++					 htab->map.key_size);
+++		free_percpu(pptr);
+++		cond_resched();
+++	}
+++free_elems:
+++	bpf_map_area_free(htab->elems);
+++}
+++
+++static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
+++					  u32 hash)
+++{
+++	struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash);
+++	struct htab_elem *l;
+++
+++	if (node) {
+++		l = container_of(node, struct htab_elem, lru_node);
+++		memcpy(l->key, key, htab->map.key_size);
+++		return l;
+++	}
+++
+++	return NULL;
+++}
+++
+++static int prealloc_init(struct bpf_htab *htab)
+++{
+++	u32 num_entries = htab->map.max_entries;
+++	int err = -ENOMEM, i;
+++
+++	if (!htab_is_percpu(htab) && !htab_is_lru(htab))
+++		num_entries += num_possible_cpus();
+++
+++	htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries,
+++					 htab->map.numa_node);
+++	if (!htab->elems)
+++		return -ENOMEM;
+++
+++	if (!htab_is_percpu(htab))
+++		goto skip_percpu_elems;
+++
+++	for (i = 0; i < num_entries; i++) {
+++		u32 size = round_up(htab->map.value_size, 8);
+++		void __percpu *pptr;
+++
+++		pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
+++		if (!pptr)
+++			goto free_elems;
+++		htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
+++				  pptr);
+++		cond_resched();
+++	}
+++
+++skip_percpu_elems:
+++	if (htab_is_lru(htab))
+++		err = bpf_lru_init(&htab->lru,
+++				   htab->map.map_flags & BPF_F_NO_COMMON_LRU,
+++				   offsetof(struct htab_elem, hash) -
+++				   offsetof(struct htab_elem, lru_node),
+++				   htab_lru_map_delete_node,
+++				   htab);
+++	else
+++		err = pcpu_freelist_init(&htab->freelist);
+++
+++	if (err)
+++		goto free_elems;
+++
+++	if (htab_is_lru(htab))
+++		bpf_lru_populate(&htab->lru, htab->elems,
+++				 offsetof(struct htab_elem, lru_node),
+++				 htab->elem_size, num_entries);
+++	else
+++		pcpu_freelist_populate(&htab->freelist,
+++				       htab->elems + offsetof(struct htab_elem, fnode),
+++				       htab->elem_size, num_entries);
+++
+++	return 0;
+++
+++free_elems:
+++	htab_free_elems(htab);
+++	return err;
+++}
+++
+++static void prealloc_destroy(struct bpf_htab *htab)
+++{
+++	htab_free_elems(htab);
+++
+++	if (htab_is_lru(htab))
+++		bpf_lru_destroy(&htab->lru);
+++	else
+++		pcpu_freelist_destroy(&htab->freelist);
+++}
+++
+++static int alloc_extra_elems(struct bpf_htab *htab)
+++{
+++	struct htab_elem *__percpu *pptr, *l_new;
+++	struct pcpu_freelist_node *l;
+++	int cpu;
+++
+++	pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
+++				  GFP_USER | __GFP_NOWARN);
+++	if (!pptr)
+++		return -ENOMEM;
+++
+++	for_each_possible_cpu(cpu) {
+++		l = pcpu_freelist_pop(&htab->freelist);
+++		/* pop will succeed, since prealloc_init()
+++		 * preallocated extra num_possible_cpus elements
+++		 */
+++		l_new = container_of(l, struct htab_elem, fnode);
+++		*per_cpu_ptr(pptr, cpu) = l_new;
+++	}
+++	htab->extra_elems = pptr;
+++	return 0;
+++}
+++
++ /* Called from syscall */
++-static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
+++static int htab_map_alloc_check(union bpf_attr *attr)
++ {
++-	struct bpf_htab *htab;
++-	int err, i;
+++	bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+++		       attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
+++	bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
+++		    attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
+++	/* percpu_lru means each cpu has its own LRU list.
+++	 * it is different from BPF_MAP_TYPE_PERCPU_HASH where
+++	 * the map's value itself is percpu.  percpu_lru has
+++	 * nothing to do with the map's value.
+++	 */
+++	bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
+++	bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
+++	bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
+++	int numa_node = bpf_map_attr_numa_node(attr);
+++
+++	BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
+++		     offsetof(struct htab_elem, hash_node.pprev));
+++	BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
+++		     offsetof(struct htab_elem, hash_node.pprev));
+++
+++	if (lru && !capable(CAP_SYS_ADMIN))
+++		/* LRU implementation is much complicated than other
+++		 * maps.  Hence, limit to CAP_SYS_ADMIN for now.
+++		 */
+++		return -EPERM;
++ 
++-	htab = kzalloc(sizeof(*htab), GFP_USER);
++-	if (!htab)
++-		return ERR_PTR(-ENOMEM);
+++	if (zero_seed && !capable(CAP_SYS_ADMIN))
+++		/* Guard against local DoS, and discourage production use. */
+++		return -EPERM;
++ 
++-	/* mandatory map attributes */
++-	htab->map.key_size = attr->key_size;
++-	htab->map.value_size = attr->value_size;
++-	htab->map.max_entries = attr->max_entries;
+++	if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK ||
+++	    !bpf_map_flags_access_ok(attr->map_flags))
+++		return -EINVAL;
+++
+++	if (!lru && percpu_lru)
+++		return -EINVAL;
+++
+++	if (lru && !prealloc)
+++		return -ENOTSUPP;
+++
+++	if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru))
+++		return -EINVAL;
++ 
++ 	/* check sanity of attributes.
++ 	 * value_size == 0 may be allowed in the future to use map as a set
++ 	 */
++-	err = -EINVAL;
++-	if (htab->map.max_entries == 0 || htab->map.key_size == 0 ||
++-	    htab->map.value_size == 0)
++-		goto free_htab;
++-
++-	/* hash table size must be power of 2 */
++-	htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
+++	if (attr->max_entries == 0 || attr->key_size == 0 ||
+++	    attr->value_size == 0)
+++		return -EINVAL;
++ 
++-	err = -E2BIG;
++-	if (htab->map.key_size > MAX_BPF_STACK)
+++	if (attr->key_size > MAX_BPF_STACK)
++ 		/* eBPF programs initialize keys on stack, so they cannot be
++ 		 * larger than max stack size
++ 		 */
++-		goto free_htab;
+++		return -E2BIG;
++ 
++-	if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
+++	if (attr->value_size >= KMALLOC_MAX_SIZE -
++ 	    MAX_BPF_STACK - sizeof(struct htab_elem))
++ 		/* if value_size is bigger, the user space won't be able to
++ 		 * access the elements via bpf syscall. This check also makes
++ 		 * sure that the elem_size doesn't overflow and it's
++ 		 * kmalloc-able later in htab_map_update_elem()
++ 		 */
++-		goto free_htab;
+++		return -E2BIG;
+++
+++	return 0;
+++}
+++
+++static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
+++{
+++	bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+++		       attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
+++	bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
+++		    attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
+++	/* percpu_lru means each cpu has its own LRU list.
+++	 * it is different from BPF_MAP_TYPE_PERCPU_HASH where
+++	 * the map's value itself is percpu.  percpu_lru has
+++	 * nothing to do with the map's value.
+++	 */
+++	bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
+++	bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
+++	struct bpf_htab *htab;
+++	int err, i;
+++	u64 cost;
+++
+++	htab = kzalloc(sizeof(*htab), GFP_USER);
+++	if (!htab)
+++		return ERR_PTR(-ENOMEM);
+++
+++	bpf_map_init_from_attr(&htab->map, attr);
+++
+++	if (percpu_lru) {
+++		/* ensure each CPU's lru list has >=1 elements.
+++		 * since we are at it, make each lru list has the same
+++		 * number of elements.
+++		 */
+++		htab->map.max_entries = roundup(attr->max_entries,
+++						num_possible_cpus());
+++		if (htab->map.max_entries < attr->max_entries)
+++			htab->map.max_entries = rounddown(attr->max_entries,
+++							  num_possible_cpus());
+++	}
+++
+++	/* hash table size must be power of 2 */
+++	htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
++ 
++ 	htab->elem_size = sizeof(struct htab_elem) +
++-			  round_up(htab->map.key_size, 8) +
++-			  htab->map.value_size;
+++			  round_up(htab->map.key_size, 8);
+++	if (percpu)
+++		htab->elem_size += sizeof(void *);
+++	else
+++		htab->elem_size += round_up(htab->map.value_size, 8);
++ 
+++	err = -E2BIG;
++ 	/* prevent zero size kmalloc and check for u32 overflow */
++ 	if (htab->n_buckets == 0 ||
++-	    htab->n_buckets > U32_MAX / sizeof(struct hlist_head))
+++	    htab->n_buckets > U32_MAX / sizeof(struct bucket))
++ 		goto free_htab;
++ 
++-	if ((u64) htab->n_buckets * sizeof(struct hlist_head) +
++-	    (u64) htab->elem_size * htab->map.max_entries >=
++-	    U32_MAX - PAGE_SIZE)
++-		/* make sure page count doesn't overflow */
++-		goto free_htab;
+++	cost = (u64) htab->n_buckets * sizeof(struct bucket) +
+++	       (u64) htab->elem_size * htab->map.max_entries;
++ 
++-	htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
++-				   htab->elem_size * htab->map.max_entries,
++-				   PAGE_SIZE) >> PAGE_SHIFT;
+++	if (percpu)
+++		cost += (u64) round_up(htab->map.value_size, 8) *
+++			num_possible_cpus() * htab->map.max_entries;
+++	else
+++	       cost += (u64) htab->elem_size * num_possible_cpus();
+++
+++	/* if map size is larger than memlock limit, reject it */
+++	err = bpf_map_charge_init(&htab->map.memory, cost);
+++	if (err)
+++		goto free_htab;
++ 
++ 	err = -ENOMEM;
++-	htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head),
++-				      GFP_USER | __GFP_NOWARN);
+++	htab->buckets = bpf_map_area_alloc(htab->n_buckets *
+++					   sizeof(struct bucket),
+++					   htab->map.numa_node);
+++	if (!htab->buckets)
+++		goto free_charge;
+++
+++	if (htab->map.map_flags & BPF_F_ZERO_SEED)
+++		htab->hashrnd = 0;
+++	else
+++		htab->hashrnd = get_random_int();
++ 
++-	if (!htab->buckets) {
++-		htab->buckets = vmalloc(htab->n_buckets * sizeof(struct hlist_head));
++-		if (!htab->buckets)
++-			goto free_htab;
+++	for (i = 0; i < htab->n_buckets; i++) {
+++		INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
+++		raw_spin_lock_init(&htab->buckets[i].lock);
++ 	}
++ 
++-	for (i = 0; i < htab->n_buckets; i++)
++-		INIT_HLIST_HEAD(&htab->buckets[i]);
++-
++-	raw_spin_lock_init(&htab->lock);
++-	htab->count = 0;
+++	if (prealloc) {
+++		err = prealloc_init(htab);
+++		if (err)
+++			goto free_buckets;
+++
+++		if (!percpu && !lru) {
+++			/* lru itself can remove the least used element, so
+++			 * there is no need for an extra elem during map_update.
+++			 */
+++			err = alloc_extra_elems(htab);
+++			if (err)
+++				goto free_prealloc;
+++		}
+++	}
++ 
++ 	return &htab->map;
++ 
+++free_prealloc:
+++	prealloc_destroy(htab);
+++free_buckets:
+++	bpf_map_area_free(htab->buckets);
+++free_charge:
+++	bpf_map_charge_finish(&htab->map.memory);
++ free_htab:
++ 	kfree(htab);
++ 	return ERR_PTR(err);
++ }
++ 
++-static inline u32 htab_map_hash(const void *key, u32 key_len)
+++static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
++ {
++-	return jhash(key, key_len, 0);
+++	return jhash(key, key_len, hashrnd);
++ }
++ 
++-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+++static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
++ {
++ 	return &htab->buckets[hash & (htab->n_buckets - 1)];
++ }
++ 
++-static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
+++static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
+++{
+++	return &__select_bucket(htab, hash)->head;
+++}
+++
+++/* this lookup function can only be called with bucket lock taken */
+++static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
++ 					 void *key, u32 key_size)
++ {
+++	struct hlist_nulls_node *n;
++ 	struct htab_elem *l;
++ 
++-	hlist_for_each_entry_rcu(l, head, hash_node)
+++	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
++ 		if (l->hash == hash && !memcmp(&l->key, key, key_size))
++ 			return l;
++ 
++ 	return NULL;
++ }
++ 
++-/* Called from syscall or from eBPF program */
++-static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
+++/* can be called without bucket lock. it will repeat the loop in
+++ * the unlikely event when elements moved from one bucket into another
+++ * while link list is being walked
+++ */
+++static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
+++					       u32 hash, void *key,
+++					       u32 key_size, u32 n_buckets)
+++{
+++	struct hlist_nulls_node *n;
+++	struct htab_elem *l;
+++
+++again:
+++	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
+++		if (l->hash == hash && !memcmp(&l->key, key, key_size))
+++			return l;
+++
+++	if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
+++		goto again;
+++
+++	return NULL;
+++}
+++
+++/* Called from syscall or from eBPF program directly, so
+++ * arguments have to match bpf_map_lookup_elem() exactly.
+++ * The return value is adjusted by BPF instructions
+++ * in htab_map_gen_lookup().
+++ */
+++static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
++ {
++ 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
++-	struct hlist_head *head;
+++	struct hlist_nulls_head *head;
++ 	struct htab_elem *l;
++ 	u32 hash, key_size;
++ 
++@@ -150,11 +470,18 @@ static void *htab_map_lookup_elem(struct
++ 
++ 	key_size = map->key_size;
++ 
++-	hash = htab_map_hash(key, key_size);
+++	hash = htab_map_hash(key, key_size, htab->hashrnd);
++ 
++ 	head = select_bucket(htab, hash);
++ 
++-	l = lookup_elem_raw(head, hash, key, key_size);
+++	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
+++
+++	return l;
+++}
+++
+++static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
+++{
+++	struct htab_elem *l = __htab_map_lookup_elem(map, key);
++ 
++ 	if (l)
++ 		return l->key + round_up(map->key_size, 8);
++@@ -162,33 +489,138 @@ static void *htab_map_lookup_elem(struct
++ 	return NULL;
++ }
++ 
+++/* inline bpf_map_lookup_elem() call.
+++ * Instead of:
+++ * bpf_prog
+++ *   bpf_map_lookup_elem
+++ *     map->ops->map_lookup_elem
+++ *       htab_map_lookup_elem
+++ *         __htab_map_lookup_elem
+++ * do:
+++ * bpf_prog
+++ *   __htab_map_lookup_elem
+++ */
+++static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+++{
+++	struct bpf_insn *insn = insn_buf;
+++	const int ret = BPF_REG_0;
+++
+++	BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
+++		     (void *(*)(struct bpf_map *map, void *key))NULL));
+++	*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
+++	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
+++	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
+++				offsetof(struct htab_elem, key) +
+++				round_up(map->key_size, 8));
+++	return insn - insn_buf;
+++}
+++
+++static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
+++							void *key, const bool mark)
+++{
+++	struct htab_elem *l = __htab_map_lookup_elem(map, key);
+++
+++	if (l) {
+++		if (mark)
+++			bpf_lru_node_set_ref(&l->lru_node);
+++		return l->key + round_up(map->key_size, 8);
+++	}
+++
+++	return NULL;
+++}
+++
+++static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
+++{
+++	return __htab_lru_map_lookup_elem(map, key, true);
+++}
+++
+++static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
+++{
+++	return __htab_lru_map_lookup_elem(map, key, false);
+++}
+++
+++static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
+++				   struct bpf_insn *insn_buf)
+++{
+++	struct bpf_insn *insn = insn_buf;
+++	const int ret = BPF_REG_0;
+++	const int ref_reg = BPF_REG_1;
+++
+++	BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
+++		     (void *(*)(struct bpf_map *map, void *key))NULL));
+++	*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
+++	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
+++	*insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
+++			      offsetof(struct htab_elem, lru_node) +
+++			      offsetof(struct bpf_lru_node, ref));
+++	*insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1);
+++	*insn++ = BPF_ST_MEM(BPF_B, ret,
+++			     offsetof(struct htab_elem, lru_node) +
+++			     offsetof(struct bpf_lru_node, ref),
+++			     1);
+++	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
+++				offsetof(struct htab_elem, key) +
+++				round_up(map->key_size, 8));
+++	return insn - insn_buf;
+++}
+++
+++/* It is called from the bpf_lru_list when the LRU needs to delete
+++ * older elements from the htab.
+++ */
+++static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
+++{
+++	struct bpf_htab *htab = (struct bpf_htab *)arg;
+++	struct htab_elem *l = NULL, *tgt_l;
+++	struct hlist_nulls_head *head;
+++	struct hlist_nulls_node *n;
+++	unsigned long flags;
+++	struct bucket *b;
+++
+++	tgt_l = container_of(node, struct htab_elem, lru_node);
+++	b = __select_bucket(htab, tgt_l->hash);
+++	head = &b->head;
+++
+++	raw_spin_lock_irqsave(&b->lock, flags);
+++
+++	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
+++		if (l == tgt_l) {
+++			hlist_nulls_del_rcu(&l->hash_node);
+++			break;
+++		}
+++
+++	raw_spin_unlock_irqrestore(&b->lock, flags);
+++
+++	return l == tgt_l;
+++}
+++
++ /* Called from syscall */
++ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
++ {
++ 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
++-	struct hlist_head *head;
+++	struct hlist_nulls_head *head;
++ 	struct htab_elem *l, *next_l;
++ 	u32 hash, key_size;
++-	int i;
+++	int i = 0;
++ 
++ 	WARN_ON_ONCE(!rcu_read_lock_held());
++ 
++ 	key_size = map->key_size;
++ 
++-	hash = htab_map_hash(key, key_size);
+++	if (!key)
+++		goto find_first_elem;
+++
+++	hash = htab_map_hash(key, key_size, htab->hashrnd);
++ 
++ 	head = select_bucket(htab, hash);
++ 
++ 	/* lookup the key */
++-	l = lookup_elem_raw(head, hash, key, key_size);
+++	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
++ 
++-	if (!l) {
++-		i = 0;
+++	if (!l)
++ 		goto find_first_elem;
++-	}
++ 
++ 	/* key was found, get next key in the same bucket */
++-	next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
+++	next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
++ 				  struct htab_elem, hash_node);
++ 
++ 	if (next_l) {
++@@ -207,7 +639,7 @@ find_first_elem:
++ 		head = select_bucket(htab, i);
++ 
++ 		/* pick first element in the bucket */
++-		next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+++		next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
++ 					  struct htab_elem, hash_node);
++ 		if (next_l) {
++ 			/* if it's not empty, just return it */
++@@ -216,90 +648,491 @@ find_first_elem:
++ 		}
++ 	}
++ 
++-	/* itereated over all buckets and all elements */
+++	/* iterated over all buckets and all elements */
++ 	return -ENOENT;
++ }
++ 
+++static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
+++{
+++	if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
+++		free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
+++	kfree(l);
+++}
+++
+++static void htab_elem_free_rcu(struct rcu_head *head)
+++{
+++	struct htab_elem *l = container_of(head, struct htab_elem, rcu);
+++	struct bpf_htab *htab = l->htab;
+++
+++	htab_elem_free(htab, l);
+++}
+++
+++static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
+++{
+++	struct bpf_map *map = &htab->map;
+++	void *ptr;
+++
+++	if (map->ops->map_fd_put_ptr) {
+++		ptr = fd_htab_map_get_ptr(map, l);
+++		map->ops->map_fd_put_ptr(ptr);
+++	}
+++}
+++
+++static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
+++{
+++	htab_put_fd_value(htab, l);
+++
+++	if (htab_is_prealloc(htab)) {
+++		__pcpu_freelist_push(&htab->freelist, &l->fnode);
+++	} else {
+++		atomic_dec(&htab->count);
+++		l->htab = htab;
+++		call_rcu(&l->rcu, htab_elem_free_rcu);
+++	}
+++}
+++
+++static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
+++			    void *value, bool onallcpus)
+++{
+++	if (!onallcpus) {
+++		/* copy true value_size bytes */
+++		memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
+++	} else {
+++		u32 size = round_up(htab->map.value_size, 8);
+++		int off = 0, cpu;
+++
+++		for_each_possible_cpu(cpu) {
+++			bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
+++					value + off, size);
+++			off += size;
+++		}
+++	}
+++}
+++
+++static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
+++			    void *value, bool onallcpus)
+++{
+++	/* When using prealloc and not setting the initial value on all cpus,
+++	 * zero-fill element values for other cpus (just as what happens when
+++	 * not using prealloc). Otherwise, bpf program has no way to ensure
+++	 * known initial values for cpus other than current one
+++	 * (onallcpus=false always when coming from bpf prog).
+++	 */
+++	if (htab_is_prealloc(htab) && !onallcpus) {
+++		u32 size = round_up(htab->map.value_size, 8);
+++		int current_cpu = raw_smp_processor_id();
+++		int cpu;
+++
+++		for_each_possible_cpu(cpu) {
+++			if (cpu == current_cpu)
+++				bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
+++						size);
+++			else
+++				memset(per_cpu_ptr(pptr, cpu), 0, size);
+++		}
+++	} else {
+++		pcpu_copy_value(htab, pptr, value, onallcpus);
+++	}
+++}
+++
+++static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
+++{
+++	return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
+++	       BITS_PER_LONG == 64;
+++}
+++
+++static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
+++					 void *value, u32 key_size, u32 hash,
+++					 bool percpu, bool onallcpus,
+++					 struct htab_elem *old_elem)
+++{
+++	u32 size = htab->map.value_size;
+++	bool prealloc = htab_is_prealloc(htab);
+++	struct htab_elem *l_new, **pl_new;
+++	void __percpu *pptr;
+++
+++	if (prealloc) {
+++		if (old_elem) {
+++			/* if we're updating the existing element,
+++			 * use per-cpu extra elems to avoid freelist_pop/push
+++			 */
+++			pl_new = this_cpu_ptr(htab->extra_elems);
+++			l_new = *pl_new;
+++			htab_put_fd_value(htab, old_elem);
+++			*pl_new = old_elem;
+++		} else {
+++			struct pcpu_freelist_node *l;
+++
+++			l = __pcpu_freelist_pop(&htab->freelist);
+++			if (!l)
+++				return ERR_PTR(-E2BIG);
+++			l_new = container_of(l, struct htab_elem, fnode);
+++		}
+++	} else {
+++		if (atomic_inc_return(&htab->count) > htab->map.max_entries)
+++			if (!old_elem) {
+++				/* when map is full and update() is replacing
+++				 * old element, it's ok to allocate, since
+++				 * old element will be freed immediately.
+++				 * Otherwise return an error
+++				 */
+++				l_new = ERR_PTR(-E2BIG);
+++				goto dec_count;
+++			}
+++		l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
+++				     htab->map.numa_node);
+++		if (!l_new) {
+++			l_new = ERR_PTR(-ENOMEM);
+++			goto dec_count;
+++		}
+++		check_and_init_map_lock(&htab->map,
+++					l_new->key + round_up(key_size, 8));
+++	}
+++
+++	memcpy(l_new->key, key, key_size);
+++	if (percpu) {
+++		size = round_up(size, 8);
+++		if (prealloc) {
+++			pptr = htab_elem_get_ptr(l_new, key_size);
+++		} else {
+++			/* alloc_percpu zero-fills */
+++			pptr = __alloc_percpu_gfp(size, 8,
+++						  GFP_ATOMIC | __GFP_NOWARN);
+++			if (!pptr) {
+++				kfree(l_new);
+++				l_new = ERR_PTR(-ENOMEM);
+++				goto dec_count;
+++			}
+++		}
+++
+++		pcpu_init_value(htab, pptr, value, onallcpus);
+++
+++		if (!prealloc)
+++			htab_elem_set_ptr(l_new, key_size, pptr);
+++	} else if (fd_htab_map_needs_adjust(htab)) {
+++		size = round_up(size, 8);
+++		memcpy(l_new->key + round_up(key_size, 8), value, size);
+++	} else {
+++		copy_map_value(&htab->map,
+++			       l_new->key + round_up(key_size, 8),
+++			       value);
+++	}
+++
+++	l_new->hash = hash;
+++	return l_new;
+++dec_count:
+++	atomic_dec(&htab->count);
+++	return l_new;
+++}
+++
+++static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
+++		       u64 map_flags)
+++{
+++	if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
+++		/* elem already exists */
+++		return -EEXIST;
+++
+++	if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
+++		/* elem doesn't exist, cannot update it */
+++		return -ENOENT;
+++
+++	return 0;
+++}
+++
++ /* Called from syscall or from eBPF program */
++ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
++ 				u64 map_flags)
++ {
++ 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
++-	struct htab_elem *l_new, *l_old;
++-	struct hlist_head *head;
+++	struct htab_elem *l_new = NULL, *l_old;
+++	struct hlist_nulls_head *head;
++ 	unsigned long flags;
++-	u32 key_size;
+++	struct bucket *b;
+++	u32 key_size, hash;
++ 	int ret;
++ 
++-	if (map_flags > BPF_EXIST)
+++	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
++ 		/* unknown flags */
++ 		return -EINVAL;
++ 
++ 	WARN_ON_ONCE(!rcu_read_lock_held());
++ 
++-	/* allocate new element outside of lock */
++-	l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
++-	if (!l_new)
++-		return -ENOMEM;
++-
++ 	key_size = map->key_size;
++ 
++-	memcpy(l_new->key, key, key_size);
++-	memcpy(l_new->key + round_up(key_size, 8), value, map->value_size);
+++	hash = htab_map_hash(key, key_size, htab->hashrnd);
+++
+++	b = __select_bucket(htab, hash);
+++	head = &b->head;
++ 
++-	l_new->hash = htab_map_hash(l_new->key, key_size);
+++	if (unlikely(map_flags & BPF_F_LOCK)) {
+++		if (unlikely(!map_value_has_spin_lock(map)))
+++			return -EINVAL;
+++		/* find an element without taking the bucket lock */
+++		l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
+++					      htab->n_buckets);
+++		ret = check_flags(htab, l_old, map_flags);
+++		if (ret)
+++			return ret;
+++		if (l_old) {
+++			/* grab the element lock and update value in place */
+++			copy_map_value_locked(map,
+++					      l_old->key + round_up(key_size, 8),
+++					      value, false);
+++			return 0;
+++		}
+++		/* fall through, grab the bucket lock and lookup again.
+++		 * 99.9% chance that the element won't be found,
+++		 * but second lookup under lock has to be done.
+++		 */
+++	}
++ 
++ 	/* bpf_map_update_elem() can be called in_irq() */
++-	raw_spin_lock_irqsave(&htab->lock, flags);
+++	raw_spin_lock_irqsave(&b->lock, flags);
++ 
++-	head = select_bucket(htab, l_new->hash);
+++	l_old = lookup_elem_raw(head, hash, key, key_size);
++ 
++-	l_old = lookup_elem_raw(head, l_new->hash, key, key_size);
+++	ret = check_flags(htab, l_old, map_flags);
+++	if (ret)
+++		goto err;
++ 
++-	if (!l_old && unlikely(htab->count >= map->max_entries)) {
++-		/* if elem with this 'key' doesn't exist and we've reached
++-		 * max_entries limit, fail insertion of new elem
+++	if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
+++		/* first lookup without the bucket lock didn't find the element,
+++		 * but second lookup with the bucket lock found it.
+++		 * This case is highly unlikely, but has to be dealt with:
+++		 * grab the element lock in addition to the bucket lock
+++		 * and update element in place
++ 		 */
++-		ret = -E2BIG;
+++		copy_map_value_locked(map,
+++				      l_old->key + round_up(key_size, 8),
+++				      value, false);
+++		ret = 0;
++ 		goto err;
++ 	}
++ 
++-	if (l_old && map_flags == BPF_NOEXIST) {
++-		/* elem already exists */
++-		ret = -EEXIST;
+++	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
+++				l_old);
+++	if (IS_ERR(l_new)) {
+++		/* all pre-allocated elements are in use or memory exhausted */
+++		ret = PTR_ERR(l_new);
++ 		goto err;
++ 	}
++ 
++-	if (!l_old && map_flags == BPF_EXIST) {
++-		/* elem doesn't exist, cannot update it */
++-		ret = -ENOENT;
++-		goto err;
+++	/* add new element to the head of the list, so that
+++	 * concurrent search will find it before old elem
+++	 */
+++	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
+++	if (l_old) {
+++		hlist_nulls_del_rcu(&l_old->hash_node);
+++		if (!htab_is_prealloc(htab))
+++			free_htab_elem(htab, l_old);
++ 	}
+++	ret = 0;
+++err:
+++	raw_spin_unlock_irqrestore(&b->lock, flags);
+++	return ret;
+++}
++ 
++-	/* add new element to the head of the list, so that concurrent
++-	 * search will find it before old elem
+++static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
+++				    u64 map_flags)
+++{
+++	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+++	struct htab_elem *l_new, *l_old = NULL;
+++	struct hlist_nulls_head *head;
+++	unsigned long flags;
+++	struct bucket *b;
+++	u32 key_size, hash;
+++	int ret;
+++
+++	if (unlikely(map_flags > BPF_EXIST))
+++		/* unknown flags */
+++		return -EINVAL;
+++
+++	WARN_ON_ONCE(!rcu_read_lock_held());
+++
+++	key_size = map->key_size;
+++
+++	hash = htab_map_hash(key, key_size, htab->hashrnd);
+++
+++	b = __select_bucket(htab, hash);
+++	head = &b->head;
+++
+++	/* For LRU, we need to alloc before taking bucket's
+++	 * spinlock because getting free nodes from LRU may need
+++	 * to remove older elements from htab and this removal
+++	 * operation will need a bucket lock.
++ 	 */
++-	hlist_add_head_rcu(&l_new->hash_node, head);
+++	l_new = prealloc_lru_pop(htab, key, hash);
+++	if (!l_new)
+++		return -ENOMEM;
+++	memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
+++
+++	/* bpf_map_update_elem() can be called in_irq() */
+++	raw_spin_lock_irqsave(&b->lock, flags);
+++
+++	l_old = lookup_elem_raw(head, hash, key, key_size);
+++
+++	ret = check_flags(htab, l_old, map_flags);
+++	if (ret)
+++		goto err;
+++
+++	/* add new element to the head of the list, so that
+++	 * concurrent search will find it before old elem
+++	 */
+++	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
+++	if (l_old) {
+++		bpf_lru_node_set_ref(&l_new->lru_node);
+++		hlist_nulls_del_rcu(&l_old->hash_node);
+++	}
+++	ret = 0;
+++
+++err:
+++	raw_spin_unlock_irqrestore(&b->lock, flags);
+++
+++	if (ret)
+++		bpf_lru_push_free(&htab->lru, &l_new->lru_node);
+++	else if (l_old)
+++		bpf_lru_push_free(&htab->lru, &l_old->lru_node);
+++
+++	return ret;
+++}
+++
+++static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+++					 void *value, u64 map_flags,
+++					 bool onallcpus)
+++{
+++	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+++	struct htab_elem *l_new = NULL, *l_old;
+++	struct hlist_nulls_head *head;
+++	unsigned long flags;
+++	struct bucket *b;
+++	u32 key_size, hash;
+++	int ret;
+++
+++	if (unlikely(map_flags > BPF_EXIST))
+++		/* unknown flags */
+++		return -EINVAL;
+++
+++	WARN_ON_ONCE(!rcu_read_lock_held());
+++
+++	key_size = map->key_size;
+++
+++	hash = htab_map_hash(key, key_size, htab->hashrnd);
+++
+++	b = __select_bucket(htab, hash);
+++	head = &b->head;
+++
+++	/* bpf_map_update_elem() can be called in_irq() */
+++	raw_spin_lock_irqsave(&b->lock, flags);
+++
+++	l_old = lookup_elem_raw(head, hash, key, key_size);
+++
+++	ret = check_flags(htab, l_old, map_flags);
+++	if (ret)
+++		goto err;
+++
++ 	if (l_old) {
++-		hlist_del_rcu(&l_old->hash_node);
++-		kfree_rcu(l_old, rcu);
+++		/* per-cpu hash map can update value in-place */
+++		pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
+++				value, onallcpus);
++ 	} else {
++-		htab->count++;
+++		l_new = alloc_htab_elem(htab, key, value, key_size,
+++					hash, true, onallcpus, NULL);
+++		if (IS_ERR(l_new)) {
+++			ret = PTR_ERR(l_new);
+++			goto err;
+++		}
+++		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
++ 	}
++-	raw_spin_unlock_irqrestore(&htab->lock, flags);
+++	ret = 0;
+++err:
+++	raw_spin_unlock_irqrestore(&b->lock, flags);
+++	return ret;
+++}
++ 
++-	return 0;
+++static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
+++					     void *value, u64 map_flags,
+++					     bool onallcpus)
+++{
+++	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+++	struct htab_elem *l_new = NULL, *l_old;
+++	struct hlist_nulls_head *head;
+++	unsigned long flags;
+++	struct bucket *b;
+++	u32 key_size, hash;
+++	int ret;
+++
+++	if (unlikely(map_flags > BPF_EXIST))
+++		/* unknown flags */
+++		return -EINVAL;
+++
+++	WARN_ON_ONCE(!rcu_read_lock_held());
+++
+++	key_size = map->key_size;
+++
+++	hash = htab_map_hash(key, key_size, htab->hashrnd);
+++
+++	b = __select_bucket(htab, hash);
+++	head = &b->head;
+++
+++	/* For LRU, we need to alloc before taking bucket's
+++	 * spinlock because LRU's elem alloc may need
+++	 * to remove older elem from htab and this removal
+++	 * operation will need a bucket lock.
+++	 */
+++	if (map_flags != BPF_EXIST) {
+++		l_new = prealloc_lru_pop(htab, key, hash);
+++		if (!l_new)
+++			return -ENOMEM;
+++	}
+++
+++	/* bpf_map_update_elem() can be called in_irq() */
+++	raw_spin_lock_irqsave(&b->lock, flags);
+++
+++	l_old = lookup_elem_raw(head, hash, key, key_size);
+++
+++	ret = check_flags(htab, l_old, map_flags);
+++	if (ret)
+++		goto err;
+++
+++	if (l_old) {
+++		bpf_lru_node_set_ref(&l_old->lru_node);
+++
+++		/* per-cpu hash map can update value in-place */
+++		pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
+++				value, onallcpus);
+++	} else {
+++		pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
+++				value, onallcpus);
+++		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
+++		l_new = NULL;
+++	}
+++	ret = 0;
++ err:
++-	raw_spin_unlock_irqrestore(&htab->lock, flags);
++-	kfree(l_new);
+++	raw_spin_unlock_irqrestore(&b->lock, flags);
+++	if (l_new)
+++		bpf_lru_push_free(&htab->lru, &l_new->lru_node);
++ 	return ret;
++ }
++ 
+++static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+++				       void *value, u64 map_flags)
+++{
+++	return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
+++}
+++
+++static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
+++					   void *value, u64 map_flags)
+++{
+++	return __htab_lru_percpu_map_update_elem(map, key, value, map_flags,
+++						 false);
+++}
+++
++ /* Called from syscall or from eBPF program */
++ static int htab_map_delete_elem(struct bpf_map *map, void *key)
++ {
++ 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
++-	struct hlist_head *head;
+++	struct hlist_nulls_head *head;
+++	struct bucket *b;
++ 	struct htab_elem *l;
++ 	unsigned long flags;
++ 	u32 hash, key_size;
++@@ -309,22 +1142,54 @@ static int htab_map_delete_elem(struct b
++ 
++ 	key_size = map->key_size;
++ 
++-	hash = htab_map_hash(key, key_size);
+++	hash = htab_map_hash(key, key_size, htab->hashrnd);
+++	b = __select_bucket(htab, hash);
+++	head = &b->head;
++ 
++-	raw_spin_lock_irqsave(&htab->lock, flags);
+++	raw_spin_lock_irqsave(&b->lock, flags);
++ 
++-	head = select_bucket(htab, hash);
+++	l = lookup_elem_raw(head, hash, key, key_size);
+++
+++	if (l) {
+++		hlist_nulls_del_rcu(&l->hash_node);
+++		free_htab_elem(htab, l);
+++		ret = 0;
+++	}
+++
+++	raw_spin_unlock_irqrestore(&b->lock, flags);
+++	return ret;
+++}
+++
+++static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
+++{
+++	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+++	struct hlist_nulls_head *head;
+++	struct bucket *b;
+++	struct htab_elem *l;
+++	unsigned long flags;
+++	u32 hash, key_size;
+++	int ret = -ENOENT;
+++
+++	WARN_ON_ONCE(!rcu_read_lock_held());
+++
+++	key_size = map->key_size;
+++
+++	hash = htab_map_hash(key, key_size, htab->hashrnd);
+++	b = __select_bucket(htab, hash);
+++	head = &b->head;
+++
+++	raw_spin_lock_irqsave(&b->lock, flags);
++ 
++ 	l = lookup_elem_raw(head, hash, key, key_size);
++ 
++ 	if (l) {
++-		hlist_del_rcu(&l->hash_node);
++-		htab->count--;
++-		kfree_rcu(l, rcu);
+++		hlist_nulls_del_rcu(&l->hash_node);
++ 		ret = 0;
++ 	}
++ 
++-	raw_spin_unlock_irqrestore(&htab->lock, flags);
+++	raw_spin_unlock_irqrestore(&b->lock, flags);
+++	if (l)
+++		bpf_lru_push_free(&htab->lru, &l->lru_node);
++ 	return ret;
++ }
++ 
++@@ -333,14 +1198,13 @@ static void delete_all_elements(struct b
++ 	int i;
++ 
++ 	for (i = 0; i < htab->n_buckets; i++) {
++-		struct hlist_head *head = select_bucket(htab, i);
++-		struct hlist_node *n;
+++		struct hlist_nulls_head *head = select_bucket(htab, i);
+++		struct hlist_nulls_node *n;
++ 		struct htab_elem *l;
++ 
++-		hlist_for_each_entry_safe(l, n, head, hash_node) {
++-			hlist_del_rcu(&l->hash_node);
++-			htab->count--;
++-			kfree(l);
+++		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+++			hlist_nulls_del_rcu(&l->hash_node);
+++			htab_elem_free(htab, l);
++ 		}
++ 	}
++ }
++@@ -357,31 +1221,320 @@ static void htab_map_free(struct bpf_map
++ 	 */
++ 	synchronize_rcu();
++ 
++-	/* some of kfree_rcu() callbacks for elements of this map may not have
++-	 * executed. It's ok. Proceed to free residual elements and map itself
+++	/* some of free_htab_elem() callbacks for elements of this map may
+++	 * not have executed. Wait for them.
++ 	 */
++-	delete_all_elements(htab);
++-	kvfree(htab->buckets);
+++	rcu_barrier();
+++	if (!htab_is_prealloc(htab))
+++		delete_all_elements(htab);
+++	else
+++		prealloc_destroy(htab);
+++
+++	free_percpu(htab->extra_elems);
+++	bpf_map_area_free(htab->buckets);
++ 	kfree(htab);
++ }
++ 
++-static const struct bpf_map_ops htab_ops = {
+++static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
+++				   struct seq_file *m)
+++{
+++	void *value;
+++
+++	rcu_read_lock();
+++
+++	value = htab_map_lookup_elem(map, key);
+++	if (!value) {
+++		rcu_read_unlock();
+++		return;
+++	}
+++
+++	btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
+++	seq_puts(m, ": ");
+++	btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
+++	seq_puts(m, "\n");
+++
+++	rcu_read_unlock();
+++}
+++
+++const struct bpf_map_ops htab_map_ops = {
+++	.map_alloc_check = htab_map_alloc_check,
++ 	.map_alloc = htab_map_alloc,
++ 	.map_free = htab_map_free,
++ 	.map_get_next_key = htab_map_get_next_key,
++ 	.map_lookup_elem = htab_map_lookup_elem,
++ 	.map_update_elem = htab_map_update_elem,
++ 	.map_delete_elem = htab_map_delete_elem,
+++	.map_gen_lookup = htab_map_gen_lookup,
+++	.map_seq_show_elem = htab_map_seq_show_elem,
++ };
++ 
++-static struct bpf_map_type_list htab_type __read_mostly = {
++-	.ops = &htab_ops,
++-	.type = BPF_MAP_TYPE_HASH,
+++const struct bpf_map_ops htab_lru_map_ops = {
+++	.map_alloc_check = htab_map_alloc_check,
+++	.map_alloc = htab_map_alloc,
+++	.map_free = htab_map_free,
+++	.map_get_next_key = htab_map_get_next_key,
+++	.map_lookup_elem = htab_lru_map_lookup_elem,
+++	.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
+++	.map_update_elem = htab_lru_map_update_elem,
+++	.map_delete_elem = htab_lru_map_delete_elem,
+++	.map_gen_lookup = htab_lru_map_gen_lookup,
+++	.map_seq_show_elem = htab_map_seq_show_elem,
++ };
++ 
++-static int __init register_htab_map(void)
+++/* Called from eBPF program */
+++static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
++ {
++-	bpf_register_map_type(&htab_type);
++-	return 0;
+++	struct htab_elem *l = __htab_map_lookup_elem(map, key);
+++
+++	if (l)
+++		return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
+++	else
+++		return NULL;
++ }
++-late_initcall(register_htab_map);
+++
+++static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
+++{
+++	struct htab_elem *l = __htab_map_lookup_elem(map, key);
+++
+++	if (l) {
+++		bpf_lru_node_set_ref(&l->lru_node);
+++		return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
+++	}
+++
+++	return NULL;
+++}
+++
+++int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
+++{
+++	struct htab_elem *l;
+++	void __percpu *pptr;
+++	int ret = -ENOENT;
+++	int cpu, off = 0;
+++	u32 size;
+++
+++	/* per_cpu areas are zero-filled and bpf programs can only
+++	 * access 'value_size' of them, so copying rounded areas
+++	 * will not leak any kernel data
+++	 */
+++	size = round_up(map->value_size, 8);
+++	rcu_read_lock();
+++	l = __htab_map_lookup_elem(map, key);
+++	if (!l)
+++		goto out;
+++	/* We do not mark LRU map element here in order to not mess up
+++	 * eviction heuristics when user space does a map walk.
+++	 */
+++	pptr = htab_elem_get_ptr(l, map->key_size);
+++	for_each_possible_cpu(cpu) {
+++		bpf_long_memcpy(value + off,
+++				per_cpu_ptr(pptr, cpu), size);
+++		off += size;
+++	}
+++	ret = 0;
+++out:
+++	rcu_read_unlock();
+++	return ret;
+++}
+++
+++int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
+++			   u64 map_flags)
+++{
+++	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+++	int ret;
+++
+++	rcu_read_lock();
+++	if (htab_is_lru(htab))
+++		ret = __htab_lru_percpu_map_update_elem(map, key, value,
+++							map_flags, true);
+++	else
+++		ret = __htab_percpu_map_update_elem(map, key, value, map_flags,
+++						    true);
+++	rcu_read_unlock();
+++
+++	return ret;
+++}
+++
+++static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
+++					  struct seq_file *m)
+++{
+++	struct htab_elem *l;
+++	void __percpu *pptr;
+++	int cpu;
+++
+++	rcu_read_lock();
+++
+++	l = __htab_map_lookup_elem(map, key);
+++	if (!l) {
+++		rcu_read_unlock();
+++		return;
+++	}
+++
+++	btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
+++	seq_puts(m, ": {\n");
+++	pptr = htab_elem_get_ptr(l, map->key_size);
+++	for_each_possible_cpu(cpu) {
+++		seq_printf(m, "\tcpu%d: ", cpu);
+++		btf_type_seq_show(map->btf, map->btf_value_type_id,
+++				  per_cpu_ptr(pptr, cpu), m);
+++		seq_puts(m, "\n");
+++	}
+++	seq_puts(m, "}\n");
+++
+++	rcu_read_unlock();
+++}
+++
+++const struct bpf_map_ops htab_percpu_map_ops = {
+++	.map_alloc_check = htab_map_alloc_check,
+++	.map_alloc = htab_map_alloc,
+++	.map_free = htab_map_free,
+++	.map_get_next_key = htab_map_get_next_key,
+++	.map_lookup_elem = htab_percpu_map_lookup_elem,
+++	.map_update_elem = htab_percpu_map_update_elem,
+++	.map_delete_elem = htab_map_delete_elem,
+++	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
+++};
+++
+++const struct bpf_map_ops htab_lru_percpu_map_ops = {
+++	.map_alloc_check = htab_map_alloc_check,
+++	.map_alloc = htab_map_alloc,
+++	.map_free = htab_map_free,
+++	.map_get_next_key = htab_map_get_next_key,
+++	.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
+++	.map_update_elem = htab_lru_percpu_map_update_elem,
+++	.map_delete_elem = htab_lru_map_delete_elem,
+++	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
+++};
+++
+++static int fd_htab_map_alloc_check(union bpf_attr *attr)
+++{
+++	if (attr->value_size != sizeof(u32))
+++		return -EINVAL;
+++	return htab_map_alloc_check(attr);
+++}
+++
+++static void fd_htab_map_free(struct bpf_map *map)
+++{
+++	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+++	struct hlist_nulls_node *n;
+++	struct hlist_nulls_head *head;
+++	struct htab_elem *l;
+++	int i;
+++
+++	for (i = 0; i < htab->n_buckets; i++) {
+++		head = select_bucket(htab, i);
+++
+++		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+++			void *ptr = fd_htab_map_get_ptr(map, l);
+++
+++			map->ops->map_fd_put_ptr(ptr);
+++		}
+++	}
+++
+++	htab_map_free(map);
+++}
+++
+++/* only called from syscall */
+++int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+++{
+++	void **ptr;
+++	int ret = 0;
+++
+++	if (!map->ops->map_fd_sys_lookup_elem)
+++		return -ENOTSUPP;
+++
+++	rcu_read_lock();
+++	ptr = htab_map_lookup_elem(map, key);
+++	if (ptr)
+++		*value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
+++	else
+++		ret = -ENOENT;
+++	rcu_read_unlock();
+++
+++	return ret;
+++}
+++
+++/* only called from syscall */
+++int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
+++				void *key, void *value, u64 map_flags)
+++{
+++	void *ptr;
+++	int ret;
+++	u32 ufd = *(u32 *)value;
+++
+++	ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
+++	if (IS_ERR(ptr))
+++		return PTR_ERR(ptr);
+++
+++	ret = htab_map_update_elem(map, key, &ptr, map_flags);
+++	if (ret)
+++		map->ops->map_fd_put_ptr(ptr);
+++
+++	return ret;
+++}
+++
+++static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
+++{
+++	struct bpf_map *map, *inner_map_meta;
+++
+++	inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
+++	if (IS_ERR(inner_map_meta))
+++		return inner_map_meta;
+++
+++	map = htab_map_alloc(attr);
+++	if (IS_ERR(map)) {
+++		bpf_map_meta_free(inner_map_meta);
+++		return map;
+++	}
+++
+++	map->inner_map_meta = inner_map_meta;
+++
+++	return map;
+++}
+++
+++static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
+++{
+++	struct bpf_map **inner_map  = htab_map_lookup_elem(map, key);
+++
+++	if (!inner_map)
+++		return NULL;
+++
+++	return READ_ONCE(*inner_map);
+++}
+++
+++static u32 htab_of_map_gen_lookup(struct bpf_map *map,
+++				  struct bpf_insn *insn_buf)
+++{
+++	struct bpf_insn *insn = insn_buf;
+++	const int ret = BPF_REG_0;
+++
+++	BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
+++		     (void *(*)(struct bpf_map *map, void *key))NULL));
+++	*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
+++	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
+++	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
+++				offsetof(struct htab_elem, key) +
+++				round_up(map->key_size, 8));
+++	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
+++
+++	return insn - insn_buf;
+++}
+++
+++static void htab_of_map_free(struct bpf_map *map)
+++{
+++	bpf_map_meta_free(map->inner_map_meta);
+++	fd_htab_map_free(map);
+++}
+++
+++const struct bpf_map_ops htab_of_maps_map_ops = {
+++	.map_alloc_check = fd_htab_map_alloc_check,
+++	.map_alloc = htab_of_map_alloc,
+++	.map_free = htab_of_map_free,
+++	.map_get_next_key = htab_map_get_next_key,
+++	.map_lookup_elem = htab_of_map_lookup_elem,
+++	.map_delete_elem = htab_map_delete_elem,
+++	.map_fd_get_ptr = bpf_map_fd_get_ptr,
+++	.map_fd_put_ptr = bpf_map_fd_put_ptr,
+++	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
+++	.map_gen_lookup = htab_of_map_gen_lookup,
+++	.map_check_btf = map_check_no_btf,
+++};
++--- a/kernel/bpf/helpers.c
+++++ b/kernel/bpf/helpers.c
++@@ -1,21 +1,18 @@
+++// SPDX-License-Identifier: GPL-2.0-only
++ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
++- *
++- * This program is free software; you can redistribute it and/or
++- * modify it under the terms of version 2 of the GNU General Public
++- * License as published by the Free Software Foundation.
++- *
++- * This program is distributed in the hope that it will be useful, but
++- * WITHOUT ANY WARRANTY; without even the implied warranty of
++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++- * General Public License for more details.
++  */
++ #include <linux/bpf.h>
++ #include <linux/rcupdate.h>
++ #include <linux/random.h>
++ #include <linux/smp.h>
+++#include <linux/topology.h>
++ #include <linux/ktime.h>
++ #include <linux/sched.h>
++ #include <linux/uidgid.h>
+++#include <linux/filter.h>
+++#include <linux/ctype.h>
+++
+++#include "../../lib/kstrtox.h"
++ 
++ /* If kernel subsystem is allowing eBPF programs to call this function,
++  * inside its own verifier_ops->get_func_proto() callback it should return
++@@ -26,48 +23,32 @@
++  * if program is allowed to access maps, so check rcu_read_lock_held in
++  * all three functions.
++  */
++-static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
++ {
++-	/* verifier checked that R1 contains a valid pointer to bpf_map
++-	 * and R2 points to a program stack and map->key_size bytes were
++-	 * initialized
++-	 */
++-	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
++-	void *key = (void *) (unsigned long) r2;
++-	void *value;
++-
++ 	WARN_ON_ONCE(!rcu_read_lock_held());
++-
++-	value = map->ops->map_lookup_elem(map, key);
++-
++-	/* lookup() returns either pointer to element value or NULL
++-	 * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type
++-	 */
++-	return (unsigned long) value;
+++	return (unsigned long) map->ops->map_lookup_elem(map, key);
++ }
++ 
++ const struct bpf_func_proto bpf_map_lookup_elem_proto = {
++ 	.func		= bpf_map_lookup_elem,
++ 	.gpl_only	= false,
+++	.pkt_access	= true,
++ 	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
++ 	.arg1_type	= ARG_CONST_MAP_PTR,
++ 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
++ };
++ 
++-static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
+++	   void *, value, u64, flags)
++ {
++-	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
++-	void *key = (void *) (unsigned long) r2;
++-	void *value = (void *) (unsigned long) r3;
++-
++ 	WARN_ON_ONCE(!rcu_read_lock_held());
++-
++-	return map->ops->map_update_elem(map, key, value, r4);
+++	return map->ops->map_update_elem(map, key, value, flags);
++ }
++ 
++ const struct bpf_func_proto bpf_map_update_elem_proto = {
++ 	.func		= bpf_map_update_elem,
++ 	.gpl_only	= false,
+++	.pkt_access	= true,
++ 	.ret_type	= RET_INTEGER,
++ 	.arg1_type	= ARG_CONST_MAP_PTR,
++ 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
++@@ -75,33 +56,71 @@ const struct bpf_func_proto bpf_map_upda
++ 	.arg4_type	= ARG_ANYTHING,
++ };
++ 
++-static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
++ {
++-	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
++-	void *key = (void *) (unsigned long) r2;
++-
++ 	WARN_ON_ONCE(!rcu_read_lock_held());
++-
++ 	return map->ops->map_delete_elem(map, key);
++ }
++ 
++ const struct bpf_func_proto bpf_map_delete_elem_proto = {
++ 	.func		= bpf_map_delete_elem,
++ 	.gpl_only	= false,
+++	.pkt_access	= true,
++ 	.ret_type	= RET_INTEGER,
++ 	.arg1_type	= ARG_CONST_MAP_PTR,
++ 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
++ };
++ 
+++BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
+++{
+++	return map->ops->map_push_elem(map, value, flags);
+++}
+++
+++const struct bpf_func_proto bpf_map_push_elem_proto = {
+++	.func		= bpf_map_push_elem,
+++	.gpl_only	= false,
+++	.pkt_access	= true,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_CONST_MAP_PTR,
+++	.arg2_type	= ARG_PTR_TO_MAP_VALUE,
+++	.arg3_type	= ARG_ANYTHING,
+++};
+++
+++BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
+++{
+++	return map->ops->map_pop_elem(map, value);
+++}
+++
+++const struct bpf_func_proto bpf_map_pop_elem_proto = {
+++	.func		= bpf_map_pop_elem,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_CONST_MAP_PTR,
+++	.arg2_type	= ARG_PTR_TO_UNINIT_MAP_VALUE,
+++};
+++
+++BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
+++{
+++	return map->ops->map_peek_elem(map, value);
+++}
+++
+++const struct bpf_func_proto bpf_map_peek_elem_proto = {
+++	.func		= bpf_map_peek_elem,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_CONST_MAP_PTR,
+++	.arg2_type	= ARG_PTR_TO_UNINIT_MAP_VALUE,
+++};
+++
++ const struct bpf_func_proto bpf_get_prandom_u32_proto = {
++ 	.func		= bpf_user_rnd_u32,
++ 	.gpl_only	= false,
++ 	.ret_type	= RET_INTEGER,
++ };
++ 
++-static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_0(bpf_get_smp_processor_id)
++ {
++-	return raw_smp_processor_id();
+++	return smp_processor_id();
++ }
++ 
++ const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
++@@ -110,7 +129,18 @@ const struct bpf_func_proto bpf_get_smp_
++ 	.ret_type	= RET_INTEGER,
++ };
++ 
++-static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_0(bpf_get_numa_node_id)
+++{
+++	return numa_node_id();
+++}
+++
+++const struct bpf_func_proto bpf_get_numa_node_id_proto = {
+++	.func		= bpf_get_numa_node_id,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++};
+++
+++BPF_CALL_0(bpf_ktime_get_ns)
++ {
++ 	/* NMI safe access to clock monotonic */
++ 	return ktime_get_mono_fast_ns();
++@@ -122,11 +152,11 @@ const struct bpf_func_proto bpf_ktime_ge
++ 	.ret_type	= RET_INTEGER,
++ };
++ 
++-static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_0(bpf_get_current_pid_tgid)
++ {
++ 	struct task_struct *task = current;
++ 
++-	if (!task)
+++	if (unlikely(!task))
++ 		return -EINVAL;
++ 
++ 	return (u64) task->tgid << 32 | task->pid;
++@@ -138,18 +168,18 @@ const struct bpf_func_proto bpf_get_curr
++ 	.ret_type	= RET_INTEGER,
++ };
++ 
++-static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_0(bpf_get_current_uid_gid)
++ {
++ 	struct task_struct *task = current;
++ 	kuid_t uid;
++ 	kgid_t gid;
++ 
++-	if (!task)
+++	if (unlikely(!task))
++ 		return -EINVAL;
++ 
++ 	current_uid_gid(&uid, &gid);
++ 	return (u64) from_kgid(&init_user_ns, gid) << 32 |
++-		from_kuid(&init_user_ns, uid);
+++		     from_kuid(&init_user_ns, uid);
++ }
++ 
++ const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
++@@ -158,22 +188,254 @@ const struct bpf_func_proto bpf_get_curr
++ 	.ret_type	= RET_INTEGER,
++ };
++ 
++-static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
++ {
++ 	struct task_struct *task = current;
++-	char *buf = (char *) (long) r1;
++ 
++-	if (!task)
++-		return -EINVAL;
+++	if (unlikely(!task))
+++		goto err_clear;
+++
+++	strncpy(buf, task->comm, size);
++ 
++-	strlcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm)));
+++	/* Verifier guarantees that size > 0. For task->comm exceeding
+++	 * size, guarantee that buf is %NUL-terminated. Unconditionally
+++	 * done here to save the size test.
+++	 */
+++	buf[size - 1] = 0;
++ 	return 0;
+++err_clear:
+++	memset(buf, 0, size);
+++	return -EINVAL;
++ }
++ 
++ const struct bpf_func_proto bpf_get_current_comm_proto = {
++ 	.func		= bpf_get_current_comm,
++ 	.gpl_only	= false,
++ 	.ret_type	= RET_INTEGER,
++-	.arg1_type	= ARG_PTR_TO_STACK,
++-	.arg2_type	= ARG_CONST_STACK_SIZE,
+++	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+++	.arg2_type	= ARG_CONST_SIZE,
+++};
+++
+++#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
+++
+++static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
+++{
+++	arch_spinlock_t *l = (void *)lock;
+++	union {
+++		__u32 val;
+++		arch_spinlock_t lock;
+++	} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
+++
+++	compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
+++	BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
+++	BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
+++	arch_spin_lock(l);
+++}
+++
+++static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
+++{
+++	arch_spinlock_t *l = (void *)lock;
+++
+++	arch_spin_unlock(l);
+++}
+++
+++#else
+++
+++static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
+++{
+++	atomic_t *l = (void *)lock;
+++
+++	BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
+++	do {
+++		smp_cond_load_relaxed(&l->counter, !VAL);
+++	} while (atomic_xchg(l, 1));
+++}
+++
+++static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
+++{
+++	atomic_t *l = (void *)lock;
+++
+++	atomic_set_release(l, 0);
+++}
+++
+++#endif
+++
+++static DEFINE_PER_CPU(unsigned long, irqsave_flags);
+++
+++notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
+++{
+++	unsigned long flags;
+++
+++	local_irq_save(flags);
+++	__bpf_spin_lock(lock);
+++	__this_cpu_write(irqsave_flags, flags);
+++	return 0;
+++}
+++
+++const struct bpf_func_proto bpf_spin_lock_proto = {
+++	.func		= bpf_spin_lock,
+++	.gpl_only	= false,
+++	.ret_type	= RET_VOID,
+++	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
+++};
+++
+++notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
+++{
+++	unsigned long flags;
+++
+++	flags = __this_cpu_read(irqsave_flags);
+++	__bpf_spin_unlock(lock);
+++	local_irq_restore(flags);
+++	return 0;
+++}
+++
+++const struct bpf_func_proto bpf_spin_unlock_proto = {
+++	.func		= bpf_spin_unlock,
+++	.gpl_only	= false,
+++	.ret_type	= RET_VOID,
+++	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
+++};
+++
+++void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
+++			   bool lock_src)
+++{
+++	struct bpf_spin_lock *lock;
+++
+++	if (lock_src)
+++		lock = src + map->spin_lock_off;
+++	else
+++		lock = dst + map->spin_lock_off;
+++	preempt_disable();
+++	____bpf_spin_lock(lock);
+++	copy_map_value(map, dst, src);
+++	____bpf_spin_unlock(lock);
+++	preempt_enable();
+++}
+++
+++#define BPF_STRTOX_BASE_MASK 0x1F
+++
+++static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
+++			  unsigned long long *res, bool *is_negative)
+++{
+++	unsigned int base = flags & BPF_STRTOX_BASE_MASK;
+++	const char *cur_buf = buf;
+++	size_t cur_len = buf_len;
+++	unsigned int consumed;
+++	size_t val_len;
+++	char str[64];
+++
+++	if (!buf || !buf_len || !res || !is_negative)
+++		return -EINVAL;
+++
+++	if (base != 0 && base != 8 && base != 10 && base != 16)
+++		return -EINVAL;
+++
+++	if (flags & ~BPF_STRTOX_BASE_MASK)
+++		return -EINVAL;
+++
+++	while (cur_buf < buf + buf_len && isspace(*cur_buf))
+++		++cur_buf;
+++
+++	*is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
+++	if (*is_negative)
+++		++cur_buf;
+++
+++	consumed = cur_buf - buf;
+++	cur_len -= consumed;
+++	if (!cur_len)
+++		return -EINVAL;
+++
+++	cur_len = min(cur_len, sizeof(str) - 1);
+++	memcpy(str, cur_buf, cur_len);
+++	str[cur_len] = '\0';
+++	cur_buf = str;
+++
+++	cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
+++	val_len = _parse_integer(cur_buf, base, res);
+++
+++	if (val_len & KSTRTOX_OVERFLOW)
+++		return -ERANGE;
+++
+++	if (val_len == 0)
+++		return -EINVAL;
+++
+++	cur_buf += val_len;
+++	consumed += cur_buf - str;
+++
+++	return consumed;
+++}
+++
+++static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
+++			 long long *res)
+++{
+++	unsigned long long _res;
+++	bool is_negative;
+++	int err;
+++
+++	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
+++	if (err < 0)
+++		return err;
+++	if (is_negative) {
+++		if ((long long)-_res > 0)
+++			return -ERANGE;
+++		*res = -_res;
+++	} else {
+++		if ((long long)_res < 0)
+++			return -ERANGE;
+++		*res = _res;
+++	}
+++	return err;
+++}
+++
+++BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
+++	   long *, res)
+++{
+++	long long _res;
+++	int err;
+++
+++	err = __bpf_strtoll(buf, buf_len, flags, &_res);
+++	if (err < 0)
+++		return err;
+++	if (_res != (long)_res)
+++		return -ERANGE;
+++	*res = _res;
+++	return err;
+++}
+++
+++const struct bpf_func_proto bpf_strtol_proto = {
+++	.func		= bpf_strtol,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_MEM,
+++	.arg2_type	= ARG_CONST_SIZE,
+++	.arg3_type	= ARG_ANYTHING,
+++	.arg4_type	= ARG_PTR_TO_LONG,
+++};
+++
+++BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
+++	   unsigned long *, res)
+++{
+++	unsigned long long _res;
+++	bool is_negative;
+++	int err;
+++
+++	err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
+++	if (err < 0)
+++		return err;
+++	if (is_negative)
+++		return -EINVAL;
+++	if (_res != (unsigned long)_res)
+++		return -ERANGE;
+++	*res = _res;
+++	return err;
+++}
+++
+++const struct bpf_func_proto bpf_strtoul_proto = {
+++	.func		= bpf_strtoul,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_MEM,
+++	.arg2_type	= ARG_CONST_SIZE,
+++	.arg3_type	= ARG_ANYTHING,
+++	.arg4_type	= ARG_PTR_TO_LONG,
++ };
++--- a/kernel/bpf/inode.c
+++++ b/kernel/bpf/inode.c
++@@ -1,3 +1,4 @@
+++// SPDX-License-Identifier: GPL-2.0-only
++ /*
++  * Minimal file system backend for holding eBPF maps and programs,
++  * used by bpf(2) object pinning.
++@@ -5,21 +6,19 @@
++  * Authors:
++  *
++  *	Daniel Borkmann <daniel@iogearbox.net>
++- *
++- * This program is free software; you can redistribute it and/or
++- * modify it under the terms of the GNU General Public License
++- * version 2 as published by the Free Software Foundation.
++  */
++ 
++-#include <linux/module.h>
+++#include <linux/init.h>
++ #include <linux/magic.h>
++ #include <linux/major.h>
++ #include <linux/mount.h>
++ #include <linux/namei.h>
++ #include <linux/fs.h>
++ #include <linux/kdev_t.h>
+++#include <linux/parser.h>
++ #include <linux/filter.h>
++ #include <linux/bpf.h>
+++#include <linux/bpf_trace.h>
++ 
++ enum bpf_type {
++ 	BPF_TYPE_UNSPEC	= 0,
++@@ -87,6 +86,7 @@ static struct inode *bpf_get_inode(struc
++ 	switch (mode & S_IFMT) {
++ 	case S_IFDIR:
++ 	case S_IFREG:
+++	case S_IFLNK:
++ 		break;
++ 	default:
++ 		return ERR_PTR(-EINVAL);
++@@ -119,18 +119,20 @@ static int bpf_inode_type(const struct i
++ 	return 0;
++ }
++ 
++-static bool bpf_dname_reserved(const struct dentry *dentry)
+++static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode,
+++				struct inode *dir)
++ {
++-	return strchr(dentry->d_name.name, '.');
+++	d_instantiate(dentry, inode);
+++	dget(dentry);
+++
+++	dir->i_mtime = CURRENT_TIME;
+++	dir->i_ctime = dir->i_mtime;
++ }
++ 
++ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
++ {
++ 	struct inode *inode;
++ 
++-	if (bpf_dname_reserved(dentry))
++-		return -EPERM;
++-
++ 	inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR);
++ 	if (IS_ERR(inode))
++ 		return PTR_ERR(inode);
++@@ -141,30 +143,30 @@ static int bpf_mkdir(struct inode *dir,
++ 	inc_nlink(inode);
++ 	inc_nlink(dir);
++ 
++-	d_instantiate(dentry, inode);
++-	dget(dentry);
++-
+++	bpf_dentry_finalize(dentry, inode, dir);
++ 	return 0;
++ }
++ 
++-static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry,
++-			 umode_t mode, const struct inode_operations *iops)
+++static int bpffs_obj_open(struct inode *inode, struct file *file)
++ {
++-	struct inode *inode;
+++	return -EIO;
+++}
++ 
++-	if (bpf_dname_reserved(dentry))
++-		return -EPERM;
+++static const struct file_operations bpffs_obj_fops = {
+++	.open		= bpffs_obj_open,
+++};
++ 
++-	inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG);
+++static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry,
+++			 umode_t mode, const struct inode_operations *iops)
+++{
+++	struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG);
++ 	if (IS_ERR(inode))
++ 		return PTR_ERR(inode);
++ 
++ 	inode->i_op = iops;
++ 	inode->i_private = dentry->d_fsdata;
++ 
++-	d_instantiate(dentry, inode);
++-	dget(dentry);
++-
+++	bpf_dentry_finalize(dentry, inode, dir);
++ 	return 0;
++ }
++ 
++@@ -187,11 +189,48 @@ static int bpf_mkobj(struct inode *dir,
++ 	}
++ }
++ 
+++static struct dentry *
+++bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
+++{
+++	/* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
+++	 * extensions.
+++	 */
+++	if (strchr(dentry->d_name.name, '.'))
+++		return ERR_PTR(-EPERM);
+++
+++	return simple_lookup(dir, dentry, flags);
+++}
+++
+++static int bpf_symlink(struct inode *dir, struct dentry *dentry,
+++		       const char *target)
+++{
+++	char *link = kstrdup(target, GFP_USER | __GFP_NOWARN);
+++	struct inode *inode;
+++
+++	if (!link)
+++		return -ENOMEM;
+++
+++	inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK);
+++	if (IS_ERR(inode)) {
+++		kfree(link);
+++		return PTR_ERR(inode);
+++	}
+++
+++	inode->i_op = &simple_symlink_inode_operations;
+++	inode->i_link = link;
+++
+++	bpf_dentry_finalize(dentry, inode, dir);
+++	return 0;
+++}
+++
++ static const struct inode_operations bpf_dir_iops = {
++-	.lookup		= simple_lookup,
+++	.lookup		= bpf_lookup,
++ 	.mknod		= bpf_mkobj,
++ 	.mkdir		= bpf_mkdir,
+++	.symlink	= bpf_symlink,
++ 	.rmdir		= simple_rmdir,
+++	.rename		= simple_rename,
+++	.link		= simple_link,
++ 	.unlink		= simple_unlink,
++ };
++ 
++@@ -256,7 +295,7 @@ out:
++ }
++ 
++ static void *bpf_obj_do_get(const struct filename *pathname,
++-			    enum bpf_type *type)
+++			    enum bpf_type *type, int flags)
++ {
++ 	struct inode *inode;
++ 	struct path path;
++@@ -268,7 +307,7 @@ static void *bpf_obj_do_get(const struct
++ 		return ERR_PTR(ret);
++ 
++ 	inode = d_backing_inode(path.dentry);
++-	ret = inode_permission(inode, MAY_WRITE);
+++	ret = inode_permission(inode, ACC_MODE(flags));
++ 	if (ret)
++ 		goto out;
++ 
++@@ -287,18 +326,23 @@ out:
++ 	return ERR_PTR(ret);
++ }
++ 
++-int bpf_obj_get_user(const char __user *pathname)
+++int bpf_obj_get_user(const char __user *pathname, int flags)
++ {
++ 	enum bpf_type type = BPF_TYPE_UNSPEC;
++ 	struct filename *pname;
++ 	int ret = -ENOENT;
+++	int f_flags;
++ 	void *raw;
++ 
+++	f_flags = bpf_get_file_flag(flags);
+++	if (f_flags < 0)
+++		return f_flags;
+++
++ 	pname = getname(pathname);
++ 	if (IS_ERR(pname))
++ 		return PTR_ERR(pname);
++ 
++-	raw = bpf_obj_do_get(pname, &type);
+++	raw = bpf_obj_do_get(pname, &type, f_flags);
++ 	if (IS_ERR(raw)) {
++ 		ret = PTR_ERR(raw);
++ 		goto out;
++@@ -307,7 +351,7 @@ int bpf_obj_get_user(const char __user *
++ 	if (type == BPF_TYPE_PROG)
++ 		ret = bpf_prog_new_fd(raw);
++ 	else if (type == BPF_TYPE_MAP)
++-		ret = bpf_map_new_fd(raw);
+++		ret = bpf_map_new_fd(raw, f_flags);
++ 	else
++ 		goto out;
++ 
++@@ -318,29 +362,131 @@ out:
++ 	return ret;
++ }
++ 
++-static void bpf_evict_inode(struct inode *inode)
+++static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
++ {
++-	enum bpf_type type;
+++	struct bpf_prog *prog;
+++	int ret = inode_permission(inode, MAY_READ);
+++	if (ret)
+++		return ERR_PTR(ret);
+++
+++	if (inode->i_op == &bpf_map_iops)
+++		return ERR_PTR(-EINVAL);
+++	if (inode->i_op != &bpf_prog_iops)
+++		return ERR_PTR(-EACCES);
++ 
++-	truncate_inode_pages_final(&inode->i_data);
++-	clear_inode(inode);
+++	prog = inode->i_private;
++ 
+++	if (!bpf_prog_get_ok(prog, &type, false))
+++		return ERR_PTR(-EINVAL);
+++
+++	return bpf_prog_inc(prog);
+++}
+++
+++struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
+++{
+++	struct bpf_prog *prog;
+++	struct path path;
+++	int ret = kern_path(name, LOOKUP_FOLLOW, &path);
+++	if (ret)
+++		return ERR_PTR(ret);
+++	prog = __get_prog_inode(d_backing_inode(path.dentry), type);
+++	if (!IS_ERR(prog))
+++		touch_atime(&path);
+++	path_put(&path);
+++	return prog;
+++}
+++EXPORT_SYMBOL(bpf_prog_get_type_path);
+++
+++/*
+++ * Display the mount options in /proc/mounts.
+++ */
+++static int bpf_show_options(struct seq_file *m, struct dentry *root)
+++{
+++	umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX;
+++
+++	if (mode != S_IRWXUGO)
+++		seq_printf(m, ",mode=%o", mode);
+++	return 0;
+++}
+++
+++static void bpf_destroy_inode_deferred(struct rcu_head *head)
+++{
+++	struct inode *inode = container_of(head, struct inode, i_rcu);
+++	enum bpf_type type;
+++
+++	if (S_ISLNK(inode->i_mode))
+++		kfree(inode->i_link);
++ 	if (!bpf_inode_type(inode, &type))
++ 		bpf_any_put(inode->i_private, type);
+++	free_inode_nonrcu(inode);
+++}
+++
+++static void bpf_destroy_inode(struct inode *inode)
+++{
+++	call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred);
++ }
++ 
++ static const struct super_operations bpf_super_ops = {
++ 	.statfs		= simple_statfs,
++ 	.drop_inode	= generic_delete_inode,
++-	.evict_inode	= bpf_evict_inode,
+++	.show_options	= bpf_show_options,
+++	.destroy_inode	= bpf_destroy_inode,
+++};
+++
+++enum {
+++	OPT_MODE,
+++	OPT_ERR,
+++};
+++
+++static const match_table_t bpf_mount_tokens = {
+++	{ OPT_MODE, "mode=%o" },
+++	{ OPT_ERR, NULL },
+++};
+++
+++struct bpf_mount_opts {
+++	umode_t mode;
++ };
++ 
+++static int bpf_parse_options(char *data, struct bpf_mount_opts *opts)
+++{
+++	substring_t args[MAX_OPT_ARGS];
+++	int option, token;
+++	char *ptr;
+++
+++	opts->mode = S_IRWXUGO;
+++
+++	while ((ptr = strsep(&data, ",")) != NULL) {
+++		if (!*ptr)
+++			continue;
+++
+++		token = match_token(ptr, bpf_mount_tokens, args);
+++		switch (token) {
+++		case OPT_MODE:
+++			if (match_octal(&args[0], &option))
+++				return -EINVAL;
+++			opts->mode = option & S_IALLUGO;
+++			break;
+++		/* We might like to report bad mount options here, but
+++		 * traditionally we've ignored all mount options, so we'd
+++		 * better continue to ignore non-existing options for bpf.
+++		 */
+++		}
+++	}
+++
+++	return 0;
+++}
+++
++ static int bpf_fill_super(struct super_block *sb, void *data, int silent)
++ {
++ 	static struct tree_descr bpf_rfiles[] = { { "" } };
+++	struct bpf_mount_opts opts;
++ 	struct inode *inode;
++ 	int ret;
++ 
+++	ret = bpf_parse_options(data, &opts);
+++	if (ret)
+++		return ret;
+++
++ 	ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
++ 	if (ret)
++ 		return ret;
++@@ -350,7 +496,7 @@ static int bpf_fill_super(struct super_b
++ 	inode = sb->s_root->d_inode;
++ 	inode->i_op = &bpf_dir_iops;
++ 	inode->i_mode &= ~S_IALLUGO;
++-	inode->i_mode |= S_ISVTX | S_IRWXUGO;
+++	inode->i_mode |= S_ISVTX | opts.mode;
++ 
++ 	return 0;
++ }
++@@ -368,8 +514,6 @@ static struct file_system_type bpf_fs_ty
++ 	.kill_sb	= kill_litter_super,
++ };
++ 
++-MODULE_ALIAS_FS("bpf");
++-
++ static int __init bpf_init(void)
++ {
++ 	int ret;
++--- /dev/null
+++++ b/kernel/bpf/local_storage.c
++@@ -0,0 +1,600 @@
+++//SPDX-License-Identifier: GPL-2.0
+++#include <linux/bpf-cgroup.h>
+++#include <linux/bpf.h>
+++#include <linux/btf.h>
+++#include <linux/bug.h>
+++#include <linux/filter.h>
+++#include <linux/mm.h>
+++#include <linux/rbtree.h>
+++#include <linux/slab.h>
+++#include <uapi/linux/btf.h>
+++
+++DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+++
+++#ifdef CONFIG_CGROUP_BPF
+++
+++#define LOCAL_STORAGE_CREATE_FLAG_MASK					\
+++	(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
+++
+++struct bpf_cgroup_storage_map {
+++	struct bpf_map map;
+++
+++	spinlock_t lock;
+++	struct bpf_prog *prog;
+++	struct rb_root root;
+++	struct list_head list;
+++};
+++
+++static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
+++{
+++	return container_of(map, struct bpf_cgroup_storage_map, map);
+++}
+++
+++static int bpf_cgroup_storage_key_cmp(
+++	const struct bpf_cgroup_storage_key *key1,
+++	const struct bpf_cgroup_storage_key *key2)
+++{
+++	if (key1->cgroup_inode_id < key2->cgroup_inode_id)
+++		return -1;
+++	else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
+++		return 1;
+++	else if (key1->attach_type < key2->attach_type)
+++		return -1;
+++	else if (key1->attach_type > key2->attach_type)
+++		return 1;
+++	return 0;
+++}
+++
+++static struct bpf_cgroup_storage *cgroup_storage_lookup(
+++	struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
+++	bool locked)
+++{
+++	struct rb_root *root = &map->root;
+++	struct rb_node *node;
+++
+++	if (!locked)
+++		spin_lock_bh(&map->lock);
+++
+++	node = root->rb_node;
+++	while (node) {
+++		struct bpf_cgroup_storage *storage;
+++
+++		storage = container_of(node, struct bpf_cgroup_storage, node);
+++
+++		switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
+++		case -1:
+++			node = node->rb_left;
+++			break;
+++		case 1:
+++			node = node->rb_right;
+++			break;
+++		default:
+++			if (!locked)
+++				spin_unlock_bh(&map->lock);
+++			return storage;
+++		}
+++	}
+++
+++	if (!locked)
+++		spin_unlock_bh(&map->lock);
+++
+++	return NULL;
+++}
+++
+++static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
+++				 struct bpf_cgroup_storage *storage)
+++{
+++	struct rb_root *root = &map->root;
+++	struct rb_node **new = &(root->rb_node), *parent = NULL;
+++
+++	while (*new) {
+++		struct bpf_cgroup_storage *this;
+++
+++		this = container_of(*new, struct bpf_cgroup_storage, node);
+++
+++		parent = *new;
+++		switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
+++		case -1:
+++			new = &((*new)->rb_left);
+++			break;
+++		case 1:
+++			new = &((*new)->rb_right);
+++			break;
+++		default:
+++			return -EEXIST;
+++		}
+++	}
+++
+++	rb_link_node(&storage->node, parent, new);
+++	rb_insert_color(&storage->node, root);
+++
+++	return 0;
+++}
+++
+++static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
+++{
+++	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+++	struct bpf_cgroup_storage_key *key = _key;
+++	struct bpf_cgroup_storage *storage;
+++
+++	storage = cgroup_storage_lookup(map, key, false);
+++	if (!storage)
+++		return NULL;
+++
+++	return &READ_ONCE(storage->buf)->data[0];
+++}
+++
+++static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
+++				      void *value, u64 flags)
+++{
+++	struct bpf_cgroup_storage_key *key = _key;
+++	struct bpf_cgroup_storage *storage;
+++	struct bpf_storage_buffer *new;
+++
+++	if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST)))
+++		return -EINVAL;
+++
+++	if (unlikely(flags & BPF_NOEXIST))
+++		return -EINVAL;
+++
+++	if (unlikely((flags & BPF_F_LOCK) &&
+++		     !map_value_has_spin_lock(map)))
+++		return -EINVAL;
+++
+++	storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
+++					key, false);
+++	if (!storage)
+++		return -ENOENT;
+++
+++	if (flags & BPF_F_LOCK) {
+++		copy_map_value_locked(map, storage->buf->data, value, false);
+++		return 0;
+++	}
+++
+++	new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
+++			   map->value_size,
+++			   __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
+++			   map->numa_node);
+++	if (!new)
+++		return -ENOMEM;
+++
+++	memcpy(&new->data[0], value, map->value_size);
+++	check_and_init_map_lock(map, new->data);
+++
+++	new = xchg(&storage->buf, new);
+++	kfree_rcu(new, rcu);
+++
+++	return 0;
+++}
+++
+++int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
+++				   void *value)
+++{
+++	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+++	struct bpf_cgroup_storage_key *key = _key;
+++	struct bpf_cgroup_storage *storage;
+++	int cpu, off = 0;
+++	u32 size;
+++
+++	rcu_read_lock();
+++	storage = cgroup_storage_lookup(map, key, false);
+++	if (!storage) {
+++		rcu_read_unlock();
+++		return -ENOENT;
+++	}
+++
+++	/* per_cpu areas are zero-filled and bpf programs can only
+++	 * access 'value_size' of them, so copying rounded areas
+++	 * will not leak any kernel data
+++	 */
+++	size = round_up(_map->value_size, 8);
+++	for_each_possible_cpu(cpu) {
+++		bpf_long_memcpy(value + off,
+++				per_cpu_ptr(storage->percpu_buf, cpu), size);
+++		off += size;
+++	}
+++	rcu_read_unlock();
+++	return 0;
+++}
+++
+++int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
+++				     void *value, u64 map_flags)
+++{
+++	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+++	struct bpf_cgroup_storage_key *key = _key;
+++	struct bpf_cgroup_storage *storage;
+++	int cpu, off = 0;
+++	u32 size;
+++
+++	if (map_flags != BPF_ANY && map_flags != BPF_EXIST)
+++		return -EINVAL;
+++
+++	rcu_read_lock();
+++	storage = cgroup_storage_lookup(map, key, false);
+++	if (!storage) {
+++		rcu_read_unlock();
+++		return -ENOENT;
+++	}
+++
+++	/* the user space will provide round_up(value_size, 8) bytes that
+++	 * will be copied into per-cpu area. bpf programs can only access
+++	 * value_size of it. During lookup the same extra bytes will be
+++	 * returned or zeros which were zero-filled by percpu_alloc,
+++	 * so no kernel data leaks possible
+++	 */
+++	size = round_up(_map->value_size, 8);
+++	for_each_possible_cpu(cpu) {
+++		bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),
+++				value + off, size);
+++		off += size;
+++	}
+++	rcu_read_unlock();
+++	return 0;
+++}
+++
+++static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
+++				       void *_next_key)
+++{
+++	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+++	struct bpf_cgroup_storage_key *key = _key;
+++	struct bpf_cgroup_storage_key *next = _next_key;
+++	struct bpf_cgroup_storage *storage;
+++
+++	spin_lock_bh(&map->lock);
+++
+++	if (list_empty(&map->list))
+++		goto enoent;
+++
+++	if (key) {
+++		storage = cgroup_storage_lookup(map, key, true);
+++		if (!storage)
+++			goto enoent;
+++
+++		storage = list_next_entry(storage, list);
+++		if (!storage)
+++			goto enoent;
+++	} else {
+++		storage = list_first_entry(&map->list,
+++					 struct bpf_cgroup_storage, list);
+++	}
+++
+++	spin_unlock_bh(&map->lock);
+++	next->attach_type = storage->key.attach_type;
+++	next->cgroup_inode_id = storage->key.cgroup_inode_id;
+++	return 0;
+++
+++enoent:
+++	spin_unlock_bh(&map->lock);
+++	return -ENOENT;
+++}
+++
+++static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
+++{
+++	int numa_node = bpf_map_attr_numa_node(attr);
+++	struct bpf_cgroup_storage_map *map;
+++	struct bpf_map_memory mem;
+++	int ret;
+++
+++	if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
+++		return ERR_PTR(-EINVAL);
+++
+++	if (attr->value_size == 0)
+++		return ERR_PTR(-EINVAL);
+++
+++	if (attr->value_size > PAGE_SIZE)
+++		return ERR_PTR(-E2BIG);
+++
+++	if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK ||
+++	    !bpf_map_flags_access_ok(attr->map_flags))
+++		return ERR_PTR(-EINVAL);
+++
+++	if (attr->max_entries)
+++		/* max_entries is not used and enforced to be 0 */
+++		return ERR_PTR(-EINVAL);
+++
+++	ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map));
+++	if (ret < 0)
+++		return ERR_PTR(ret);
+++
+++	map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
+++			   __GFP_ZERO | GFP_USER, numa_node);
+++	if (!map) {
+++		bpf_map_charge_finish(&mem);
+++		return ERR_PTR(-ENOMEM);
+++	}
+++
+++	bpf_map_charge_move(&map->map.memory, &mem);
+++
+++	/* copy mandatory map attributes */
+++	bpf_map_init_from_attr(&map->map, attr);
+++
+++	spin_lock_init(&map->lock);
+++	map->root = RB_ROOT;
+++	INIT_LIST_HEAD(&map->list);
+++
+++	return &map->map;
+++}
+++
+++static void cgroup_storage_map_free(struct bpf_map *_map)
+++{
+++	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+++
+++	WARN_ON(!RB_EMPTY_ROOT(&map->root));
+++	WARN_ON(!list_empty(&map->list));
+++
+++	kfree(map);
+++}
+++
+++static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
+++{
+++	return -EINVAL;
+++}
+++
+++static int cgroup_storage_check_btf(const struct bpf_map *map,
+++				    const struct btf *btf,
+++				    const struct btf_type *key_type,
+++				    const struct btf_type *value_type)
+++{
+++	struct btf_member *m;
+++	u32 offset, size;
+++
+++	/* Key is expected to be of struct bpf_cgroup_storage_key type,
+++	 * which is:
+++	 * struct bpf_cgroup_storage_key {
+++	 *	__u64	cgroup_inode_id;
+++	 *	__u32	attach_type;
+++	 * };
+++	 */
+++
+++	/*
+++	 * Key_type must be a structure with two fields.
+++	 */
+++	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ||
+++	    BTF_INFO_VLEN(key_type->info) != 2)
+++		return -EINVAL;
+++
+++	/*
+++	 * The first field must be a 64 bit integer at 0 offset.
+++	 */
+++	m = (struct btf_member *)(key_type + 1);
+++	size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, cgroup_inode_id);
+++	if (!btf_member_is_reg_int(btf, key_type, m, 0, size))
+++		return -EINVAL;
+++
+++	/*
+++	 * The second field must be a 32 bit integer at 64 bit offset.
+++	 */
+++	m++;
+++	offset = offsetof(struct bpf_cgroup_storage_key, attach_type);
+++	size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, attach_type);
+++	if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
+++		return -EINVAL;
+++
+++	return 0;
+++}
+++
+++static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *_key,
+++					 struct seq_file *m)
+++{
+++	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
+++	struct bpf_cgroup_storage_key *key = _key;
+++	struct bpf_cgroup_storage *storage;
+++	int cpu;
+++
+++	rcu_read_lock();
+++	storage = cgroup_storage_lookup(map_to_storage(map), key, false);
+++	if (!storage) {
+++		rcu_read_unlock();
+++		return;
+++	}
+++
+++	btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
+++	stype = cgroup_storage_type(map);
+++	if (stype == BPF_CGROUP_STORAGE_SHARED) {
+++		seq_puts(m, ": ");
+++		btf_type_seq_show(map->btf, map->btf_value_type_id,
+++				  &READ_ONCE(storage->buf)->data[0], m);
+++		seq_puts(m, "\n");
+++	} else {
+++		seq_puts(m, ": {\n");
+++		for_each_possible_cpu(cpu) {
+++			seq_printf(m, "\tcpu%d: ", cpu);
+++			btf_type_seq_show(map->btf, map->btf_value_type_id,
+++					  per_cpu_ptr(storage->percpu_buf, cpu),
+++					  m);
+++			seq_puts(m, "\n");
+++		}
+++		seq_puts(m, "}\n");
+++	}
+++	rcu_read_unlock();
+++}
+++
+++const struct bpf_map_ops cgroup_storage_map_ops = {
+++	.map_alloc = cgroup_storage_map_alloc,
+++	.map_free = cgroup_storage_map_free,
+++	.map_get_next_key = cgroup_storage_get_next_key,
+++	.map_lookup_elem = cgroup_storage_lookup_elem,
+++	.map_update_elem = cgroup_storage_update_elem,
+++	.map_delete_elem = cgroup_storage_delete_elem,
+++	.map_check_btf = cgroup_storage_check_btf,
+++	.map_seq_show_elem = cgroup_storage_seq_show_elem,
+++};
+++
+++int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
+++{
+++	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
+++	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+++	int ret = -EBUSY;
+++
+++	spin_lock_bh(&map->lock);
+++
+++	if (map->prog && map->prog != prog)
+++		goto unlock;
+++	if (prog->aux->cgroup_storage[stype] &&
+++	    prog->aux->cgroup_storage[stype] != _map)
+++		goto unlock;
+++
+++	map->prog = prog;
+++	prog->aux->cgroup_storage[stype] = _map;
+++	ret = 0;
+++unlock:
+++	spin_unlock_bh(&map->lock);
+++
+++	return ret;
+++}
+++
+++void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
+++{
+++	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
+++	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+++
+++	spin_lock_bh(&map->lock);
+++	if (map->prog == prog) {
+++		WARN_ON(prog->aux->cgroup_storage[stype] != _map);
+++		map->prog = NULL;
+++		prog->aux->cgroup_storage[stype] = NULL;
+++	}
+++	spin_unlock_bh(&map->lock);
+++}
+++
+++static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
+++{
+++	size_t size;
+++
+++	if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) {
+++		size = sizeof(struct bpf_storage_buffer) + map->value_size;
+++		*pages = round_up(sizeof(struct bpf_cgroup_storage) + size,
+++				  PAGE_SIZE) >> PAGE_SHIFT;
+++	} else {
+++		size = map->value_size;
+++		*pages = round_up(round_up(size, 8) * num_possible_cpus(),
+++				  PAGE_SIZE) >> PAGE_SHIFT;
+++	}
+++
+++	return size;
+++}
+++
+++struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
+++					enum bpf_cgroup_storage_type stype)
+++{
+++	struct bpf_cgroup_storage *storage;
+++	struct bpf_map *map;
+++	gfp_t flags;
+++	size_t size;
+++	u32 pages;
+++
+++	map = prog->aux->cgroup_storage[stype];
+++	if (!map)
+++		return NULL;
+++
+++	size = bpf_cgroup_storage_calculate_size(map, &pages);
+++
+++	if (bpf_map_charge_memlock(map, pages))
+++		return ERR_PTR(-EPERM);
+++
+++	storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
+++			       __GFP_ZERO | GFP_USER, map->numa_node);
+++	if (!storage)
+++		goto enomem;
+++
+++	flags = __GFP_ZERO | GFP_USER;
+++
+++	if (stype == BPF_CGROUP_STORAGE_SHARED) {
+++		storage->buf = kmalloc_node(size, flags, map->numa_node);
+++		if (!storage->buf)
+++			goto enomem;
+++		check_and_init_map_lock(map, storage->buf->data);
+++	} else {
+++		storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
+++		if (!storage->percpu_buf)
+++			goto enomem;
+++	}
+++
+++	storage->map = (struct bpf_cgroup_storage_map *)map;
+++
+++	return storage;
+++
+++enomem:
+++	bpf_map_uncharge_memlock(map, pages);
+++	kfree(storage);
+++	return ERR_PTR(-ENOMEM);
+++}
+++
+++static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu)
+++{
+++	struct bpf_cgroup_storage *storage =
+++		container_of(rcu, struct bpf_cgroup_storage, rcu);
+++
+++	kfree(storage->buf);
+++	kfree(storage);
+++}
+++
+++static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu)
+++{
+++	struct bpf_cgroup_storage *storage =
+++		container_of(rcu, struct bpf_cgroup_storage, rcu);
+++
+++	free_percpu(storage->percpu_buf);
+++	kfree(storage);
+++}
+++
+++void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
+++{
+++	enum bpf_cgroup_storage_type stype;
+++	struct bpf_map *map;
+++	u32 pages;
+++
+++	if (!storage)
+++		return;
+++
+++	map = &storage->map->map;
+++
+++	bpf_cgroup_storage_calculate_size(map, &pages);
+++	bpf_map_uncharge_memlock(map, pages);
+++
+++	stype = cgroup_storage_type(map);
+++	if (stype == BPF_CGROUP_STORAGE_SHARED)
+++		call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu);
+++	else
+++		call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu);
+++}
+++
+++void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
+++			     struct cgroup *cgroup,
+++			     enum bpf_attach_type type)
+++{
+++	struct bpf_cgroup_storage_map *map;
+++
+++	if (!storage)
+++		return;
+++
+++	storage->key.attach_type = type;
+++	storage->key.cgroup_inode_id = cgroup->kn->id.id;
+++
+++	map = storage->map;
+++
+++	spin_lock_bh(&map->lock);
+++	WARN_ON(cgroup_storage_insert(map, storage));
+++	list_add(&storage->list, &map->list);
+++	spin_unlock_bh(&map->lock);
+++}
+++
+++void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
+++{
+++	struct bpf_cgroup_storage_map *map;
+++	struct rb_root *root;
+++
+++	if (!storage)
+++		return;
+++
+++	map = storage->map;
+++
+++	spin_lock_bh(&map->lock);
+++	root = &map->root;
+++	rb_erase(&storage->node, root);
+++
+++	list_del(&storage->list);
+++	spin_unlock_bh(&map->lock);
+++}
+++
+++#endif
++--- /dev/null
+++++ b/kernel/bpf/lpm_trie.c
++@@ -0,0 +1,746 @@
+++// SPDX-License-Identifier: GPL-2.0-only
+++/*
+++ * Longest prefix match list implementation
+++ *
+++ * Copyright (c) 2016,2017 Daniel Mack
+++ * Copyright (c) 2016 David Herrmann
+++ */
+++
+++#include <linux/bpf.h>
+++#include <linux/btf.h>
+++#include <linux/err.h>
+++#include <linux/slab.h>
+++#include <linux/spinlock.h>
+++#include <linux/vmalloc.h>
+++#include <net/ipv6.h>
+++#include <uapi/linux/btf.h>
+++
+++/* Intermediate node */
+++#define LPM_TREE_NODE_FLAG_IM BIT(0)
+++
+++struct lpm_trie_node;
+++
+++struct lpm_trie_node {
+++	struct rcu_head rcu;
+++	struct lpm_trie_node __rcu	*child[2];
+++	u32				prefixlen;
+++	u32				flags;
+++	u8				data[0];
+++};
+++
+++struct lpm_trie {
+++	struct bpf_map			map;
+++	struct lpm_trie_node __rcu	*root;
+++	size_t				n_entries;
+++	size_t				max_prefixlen;
+++	size_t				data_size;
+++	raw_spinlock_t			lock;
+++};
+++
+++/* This trie implements a longest prefix match algorithm that can be used to
+++ * match IP addresses to a stored set of ranges.
+++ *
+++ * Data stored in @data of struct bpf_lpm_key and struct lpm_trie_node is
+++ * interpreted as big endian, so data[0] stores the most significant byte.
+++ *
+++ * Match ranges are internally stored in instances of struct lpm_trie_node
+++ * which each contain their prefix length as well as two pointers that may
+++ * lead to more nodes containing more specific matches. Each node also stores
+++ * a value that is defined by and returned to userspace via the update_elem
+++ * and lookup functions.
+++ *
+++ * For instance, let's start with a trie that was created with a prefix length
+++ * of 32, so it can be used for IPv4 addresses, and one single element that
+++ * matches 192.168.0.0/16. The data array would hence contain
+++ * [0xc0, 0xa8, 0x00, 0x00] in big-endian notation. This documentation will
+++ * stick to IP-address notation for readability though.
+++ *
+++ * As the trie is empty initially, the new node (1) will be places as root
+++ * node, denoted as (R) in the example below. As there are no other node, both
+++ * child pointers are %NULL.
+++ *
+++ *              +----------------+
+++ *              |       (1)  (R) |
+++ *              | 192.168.0.0/16 |
+++ *              |    value: 1    |
+++ *              |   [0]    [1]   |
+++ *              +----------------+
+++ *
+++ * Next, let's add a new node (2) matching 192.168.0.0/24. As there is already
+++ * a node with the same data and a smaller prefix (ie, a less specific one),
+++ * node (2) will become a child of (1). In child index depends on the next bit
+++ * that is outside of what (1) matches, and that bit is 0, so (2) will be
+++ * child[0] of (1):
+++ *
+++ *              +----------------+
+++ *              |       (1)  (R) |
+++ *              | 192.168.0.0/16 |
+++ *              |    value: 1    |
+++ *              |   [0]    [1]   |
+++ *              +----------------+
+++ *                   |
+++ *    +----------------+
+++ *    |       (2)      |
+++ *    | 192.168.0.0/24 |
+++ *    |    value: 2    |
+++ *    |   [0]    [1]   |
+++ *    +----------------+
+++ *
+++ * The child[1] slot of (1) could be filled with another node which has bit #17
+++ * (the next bit after the ones that (1) matches on) set to 1. For instance,
+++ * 192.168.128.0/24:
+++ *
+++ *              +----------------+
+++ *              |       (1)  (R) |
+++ *              | 192.168.0.0/16 |
+++ *              |    value: 1    |
+++ *              |   [0]    [1]   |
+++ *              +----------------+
+++ *                   |      |
+++ *    +----------------+  +------------------+
+++ *    |       (2)      |  |        (3)       |
+++ *    | 192.168.0.0/24 |  | 192.168.128.0/24 |
+++ *    |    value: 2    |  |     value: 3     |
+++ *    |   [0]    [1]   |  |    [0]    [1]    |
+++ *    +----------------+  +------------------+
+++ *
+++ * Let's add another node (4) to the game for 192.168.1.0/24. In order to place
+++ * it, node (1) is looked at first, and because (4) of the semantics laid out
+++ * above (bit #17 is 0), it would normally be attached to (1) as child[0].
+++ * However, that slot is already allocated, so a new node is needed in between.
+++ * That node does not have a value attached to it and it will never be
+++ * returned to users as result of a lookup. It is only there to differentiate
+++ * the traversal further. It will get a prefix as wide as necessary to
+++ * distinguish its two children:
+++ *
+++ *                      +----------------+
+++ *                      |       (1)  (R) |
+++ *                      | 192.168.0.0/16 |
+++ *                      |    value: 1    |
+++ *                      |   [0]    [1]   |
+++ *                      +----------------+
+++ *                           |      |
+++ *            +----------------+  +------------------+
+++ *            |       (4)  (I) |  |        (3)       |
+++ *            | 192.168.0.0/23 |  | 192.168.128.0/24 |
+++ *            |    value: ---  |  |     value: 3     |
+++ *            |   [0]    [1]   |  |    [0]    [1]    |
+++ *            +----------------+  +------------------+
+++ *                 |      |
+++ *  +----------------+  +----------------+
+++ *  |       (2)      |  |       (5)      |
+++ *  | 192.168.0.0/24 |  | 192.168.1.0/24 |
+++ *  |    value: 2    |  |     value: 5   |
+++ *  |   [0]    [1]   |  |   [0]    [1]   |
+++ *  +----------------+  +----------------+
+++ *
+++ * 192.168.1.1/32 would be a child of (5) etc.
+++ *
+++ * An intermediate node will be turned into a 'real' node on demand. In the
+++ * example above, (4) would be re-used if 192.168.0.0/23 is added to the trie.
+++ *
+++ * A fully populated trie would have a height of 32 nodes, as the trie was
+++ * created with a prefix length of 32.
+++ *
+++ * The lookup starts at the root node. If the current node matches and if there
+++ * is a child that can be used to become more specific, the trie is traversed
+++ * downwards. The last node in the traversal that is a non-intermediate one is
+++ * returned.
+++ */
+++
+++static inline int extract_bit(const u8 *data, size_t index)
+++{
+++	return !!(data[index / 8] & (1 << (7 - (index % 8))));
+++}
+++
+++/**
+++ * longest_prefix_match() - determine the longest prefix
+++ * @trie:	The trie to get internal sizes from
+++ * @node:	The node to operate on
+++ * @key:	The key to compare to @node
+++ *
+++ * Determine the longest prefix of @node that matches the bits in @key.
+++ */
+++static size_t longest_prefix_match(const struct lpm_trie *trie,
+++				   const struct lpm_trie_node *node,
+++				   const struct bpf_lpm_trie_key *key)
+++{
+++	u32 limit = min(node->prefixlen, key->prefixlen);
+++	u32 prefixlen = 0, i = 0;
+++
+++	BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32));
+++	BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key, data) % sizeof(u32));
+++
+++#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT)
+++
+++	/* data_size >= 16 has very small probability.
+++	 * We do not use a loop for optimal code generation.
+++	 */
+++	if (trie->data_size >= 8) {
+++		u64 diff = be64_to_cpu(*(__be64 *)node->data ^
+++				       *(__be64 *)key->data);
+++
+++		prefixlen = 64 - fls64(diff);
+++		if (prefixlen >= limit)
+++			return limit;
+++		if (diff)
+++			return prefixlen;
+++		i = 8;
+++	}
+++#endif
+++
+++	while (trie->data_size >= i + 4) {
+++		u32 diff = be32_to_cpu(*(__be32 *)&node->data[i] ^
+++				       *(__be32 *)&key->data[i]);
+++
+++		prefixlen += 32 - fls(diff);
+++		if (prefixlen >= limit)
+++			return limit;
+++		if (diff)
+++			return prefixlen;
+++		i += 4;
+++	}
+++
+++	if (trie->data_size >= i + 2) {
+++		u16 diff = be16_to_cpu(*(__be16 *)&node->data[i] ^
+++				       *(__be16 *)&key->data[i]);
+++
+++		prefixlen += 16 - fls(diff);
+++		if (prefixlen >= limit)
+++			return limit;
+++		if (diff)
+++			return prefixlen;
+++		i += 2;
+++	}
+++
+++	if (trie->data_size >= i + 1) {
+++		prefixlen += 8 - fls(node->data[i] ^ key->data[i]);
+++
+++		if (prefixlen >= limit)
+++			return limit;
+++	}
+++
+++	return prefixlen;
+++}
+++
+++/* Called from syscall or from eBPF program */
+++static void *trie_lookup_elem(struct bpf_map *map, void *_key)
+++{
+++	struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+++	struct lpm_trie_node *node, *found = NULL;
+++	struct bpf_lpm_trie_key *key = _key;
+++
+++	/* Start walking the trie from the root node ... */
+++
+++	for (node = rcu_dereference(trie->root); node;) {
+++		unsigned int next_bit;
+++		size_t matchlen;
+++
+++		/* Determine the longest prefix of @node that matches @key.
+++		 * If it's the maximum possible prefix for this trie, we have
+++		 * an exact match and can return it directly.
+++		 */
+++		matchlen = longest_prefix_match(trie, node, key);
+++		if (matchlen == trie->max_prefixlen) {
+++			found = node;
+++			break;
+++		}
+++
+++		/* If the number of bits that match is smaller than the prefix
+++		 * length of @node, bail out and return the node we have seen
+++		 * last in the traversal (ie, the parent).
+++		 */
+++		if (matchlen < node->prefixlen)
+++			break;
+++
+++		/* Consider this node as return candidate unless it is an
+++		 * artificially added intermediate one.
+++		 */
+++		if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
+++			found = node;
+++
+++		/* If the node match is fully satisfied, let's see if we can
+++		 * become more specific. Determine the next bit in the key and
+++		 * traverse down.
+++		 */
+++		next_bit = extract_bit(key->data, node->prefixlen);
+++		node = rcu_dereference(node->child[next_bit]);
+++	}
+++
+++	if (!found)
+++		return NULL;
+++
+++	return found->data + trie->data_size;
+++}
+++
+++static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
+++						 const void *value)
+++{
+++	struct lpm_trie_node *node;
+++	size_t size = sizeof(struct lpm_trie_node) + trie->data_size;
+++
+++	if (value)
+++		size += trie->map.value_size;
+++
+++	node = kmalloc_node(size, GFP_ATOMIC | __GFP_NOWARN,
+++			    trie->map.numa_node);
+++	if (!node)
+++		return NULL;
+++
+++	node->flags = 0;
+++
+++	if (value)
+++		memcpy(node->data + trie->data_size, value,
+++		       trie->map.value_size);
+++
+++	return node;
+++}
+++
+++/* Called from syscall or from eBPF program */
+++static int trie_update_elem(struct bpf_map *map,
+++			    void *_key, void *value, u64 flags)
+++{
+++	struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+++	struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
+++	struct lpm_trie_node __rcu **slot;
+++	struct bpf_lpm_trie_key *key = _key;
+++	unsigned long irq_flags;
+++	unsigned int next_bit;
+++	size_t matchlen = 0;
+++	int ret = 0;
+++
+++	if (unlikely(flags > BPF_EXIST))
+++		return -EINVAL;
+++
+++	if (key->prefixlen > trie->max_prefixlen)
+++		return -EINVAL;
+++
+++	raw_spin_lock_irqsave(&trie->lock, irq_flags);
+++
+++	/* Allocate and fill a new node */
+++
+++	if (trie->n_entries == trie->map.max_entries) {
+++		ret = -ENOSPC;
+++		goto out;
+++	}
+++
+++	new_node = lpm_trie_node_alloc(trie, value);
+++	if (!new_node) {
+++		ret = -ENOMEM;
+++		goto out;
+++	}
+++
+++	trie->n_entries++;
+++
+++	new_node->prefixlen = key->prefixlen;
+++	RCU_INIT_POINTER(new_node->child[0], NULL);
+++	RCU_INIT_POINTER(new_node->child[1], NULL);
+++	memcpy(new_node->data, key->data, trie->data_size);
+++
+++	/* Now find a slot to attach the new node. To do that, walk the tree
+++	 * from the root and match as many bits as possible for each node until
+++	 * we either find an empty slot or a slot that needs to be replaced by
+++	 * an intermediate node.
+++	 */
+++	slot = &trie->root;
+++
+++	while ((node = rcu_dereference_protected(*slot,
+++					lockdep_is_held(&trie->lock)))) {
+++		matchlen = longest_prefix_match(trie, node, key);
+++
+++		if (node->prefixlen != matchlen ||
+++		    node->prefixlen == key->prefixlen ||
+++		    node->prefixlen == trie->max_prefixlen)
+++			break;
+++
+++		next_bit = extract_bit(key->data, node->prefixlen);
+++		slot = &node->child[next_bit];
+++	}
+++
+++	/* If the slot is empty (a free child pointer or an empty root),
+++	 * simply assign the @new_node to that slot and be done.
+++	 */
+++	if (!node) {
+++		rcu_assign_pointer(*slot, new_node);
+++		goto out;
+++	}
+++
+++	/* If the slot we picked already exists, replace it with @new_node
+++	 * which already has the correct data array set.
+++	 */
+++	if (node->prefixlen == matchlen) {
+++		new_node->child[0] = node->child[0];
+++		new_node->child[1] = node->child[1];
+++
+++		if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
+++			trie->n_entries--;
+++
+++		rcu_assign_pointer(*slot, new_node);
+++		kfree_rcu(node, rcu);
+++
+++		goto out;
+++	}
+++
+++	/* If the new node matches the prefix completely, it must be inserted
+++	 * as an ancestor. Simply insert it between @node and *@slot.
+++	 */
+++	if (matchlen == key->prefixlen) {
+++		next_bit = extract_bit(node->data, matchlen);
+++		rcu_assign_pointer(new_node->child[next_bit], node);
+++		rcu_assign_pointer(*slot, new_node);
+++		goto out;
+++	}
+++
+++	im_node = lpm_trie_node_alloc(trie, NULL);
+++	if (!im_node) {
+++		ret = -ENOMEM;
+++		goto out;
+++	}
+++
+++	im_node->prefixlen = matchlen;
+++	im_node->flags |= LPM_TREE_NODE_FLAG_IM;
+++	memcpy(im_node->data, node->data, trie->data_size);
+++
+++	/* Now determine which child to install in which slot */
+++	if (extract_bit(key->data, matchlen)) {
+++		rcu_assign_pointer(im_node->child[0], node);
+++		rcu_assign_pointer(im_node->child[1], new_node);
+++	} else {
+++		rcu_assign_pointer(im_node->child[0], new_node);
+++		rcu_assign_pointer(im_node->child[1], node);
+++	}
+++
+++	/* Finally, assign the intermediate node to the determined spot */
+++	rcu_assign_pointer(*slot, im_node);
+++
+++out:
+++	if (ret) {
+++		if (new_node)
+++			trie->n_entries--;
+++
+++		kfree(new_node);
+++		kfree(im_node);
+++	}
+++
+++	raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
+++
+++	return ret;
+++}
+++
+++/* Called from syscall or from eBPF program */
+++static int trie_delete_elem(struct bpf_map *map, void *_key)
+++{
+++	struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+++	struct bpf_lpm_trie_key *key = _key;
+++	struct lpm_trie_node __rcu **trim, **trim2;
+++	struct lpm_trie_node *node, *parent;
+++	unsigned long irq_flags;
+++	unsigned int next_bit;
+++	size_t matchlen = 0;
+++	int ret = 0;
+++
+++	if (key->prefixlen > trie->max_prefixlen)
+++		return -EINVAL;
+++
+++	raw_spin_lock_irqsave(&trie->lock, irq_flags);
+++
+++	/* Walk the tree looking for an exact key/length match and keeping
+++	 * track of the path we traverse.  We will need to know the node
+++	 * we wish to delete, and the slot that points to the node we want
+++	 * to delete.  We may also need to know the nodes parent and the
+++	 * slot that contains it.
+++	 */
+++	trim = &trie->root;
+++	trim2 = trim;
+++	parent = NULL;
+++	while ((node = rcu_dereference_protected(
+++		       *trim, lockdep_is_held(&trie->lock)))) {
+++		matchlen = longest_prefix_match(trie, node, key);
+++
+++		if (node->prefixlen != matchlen ||
+++		    node->prefixlen == key->prefixlen)
+++			break;
+++
+++		parent = node;
+++		trim2 = trim;
+++		next_bit = extract_bit(key->data, node->prefixlen);
+++		trim = &node->child[next_bit];
+++	}
+++
+++	if (!node || node->prefixlen != key->prefixlen ||
+++	    node->prefixlen != matchlen ||
+++	    (node->flags & LPM_TREE_NODE_FLAG_IM)) {
+++		ret = -ENOENT;
+++		goto out;
+++	}
+++
+++	trie->n_entries--;
+++
+++	/* If the node we are removing has two children, simply mark it
+++	 * as intermediate and we are done.
+++	 */
+++	if (rcu_access_pointer(node->child[0]) &&
+++	    rcu_access_pointer(node->child[1])) {
+++		node->flags |= LPM_TREE_NODE_FLAG_IM;
+++		goto out;
+++	}
+++
+++	/* If the parent of the node we are about to delete is an intermediate
+++	 * node, and the deleted node doesn't have any children, we can delete
+++	 * the intermediate parent as well and promote its other child
+++	 * up the tree.  Doing this maintains the invariant that all
+++	 * intermediate nodes have exactly 2 children and that there are no
+++	 * unnecessary intermediate nodes in the tree.
+++	 */
+++	if (parent && (parent->flags & LPM_TREE_NODE_FLAG_IM) &&
+++	    !node->child[0] && !node->child[1]) {
+++		if (node == rcu_access_pointer(parent->child[0]))
+++			rcu_assign_pointer(
+++				*trim2, rcu_access_pointer(parent->child[1]));
+++		else
+++			rcu_assign_pointer(
+++				*trim2, rcu_access_pointer(parent->child[0]));
+++		kfree_rcu(parent, rcu);
+++		kfree_rcu(node, rcu);
+++		goto out;
+++	}
+++
+++	/* The node we are removing has either zero or one child. If there
+++	 * is a child, move it into the removed node's slot then delete
+++	 * the node.  Otherwise just clear the slot and delete the node.
+++	 */
+++	if (node->child[0])
+++		rcu_assign_pointer(*trim, rcu_access_pointer(node->child[0]));
+++	else if (node->child[1])
+++		rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1]));
+++	else
+++		RCU_INIT_POINTER(*trim, NULL);
+++	kfree_rcu(node, rcu);
+++
+++out:
+++	raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
+++
+++	return ret;
+++}
+++
+++#define LPM_DATA_SIZE_MAX	256
+++#define LPM_DATA_SIZE_MIN	1
+++
+++#define LPM_VAL_SIZE_MAX	(KMALLOC_MAX_SIZE - LPM_DATA_SIZE_MAX - \
+++				 sizeof(struct lpm_trie_node))
+++#define LPM_VAL_SIZE_MIN	1
+++
+++#define LPM_KEY_SIZE(X)		(sizeof(struct bpf_lpm_trie_key) + (X))
+++#define LPM_KEY_SIZE_MAX	LPM_KEY_SIZE(LPM_DATA_SIZE_MAX)
+++#define LPM_KEY_SIZE_MIN	LPM_KEY_SIZE(LPM_DATA_SIZE_MIN)
+++
+++#define LPM_CREATE_FLAG_MASK	(BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE |	\
+++				 BPF_F_ACCESS_MASK)
+++
+++static struct bpf_map *trie_alloc(union bpf_attr *attr)
+++{
+++	struct lpm_trie *trie;
+++	u64 cost = sizeof(*trie), cost_per_node;
+++	int ret;
+++
+++	if (!capable(CAP_SYS_ADMIN))
+++		return ERR_PTR(-EPERM);
+++
+++	/* check sanity of attributes */
+++	if (attr->max_entries == 0 ||
+++	    !(attr->map_flags & BPF_F_NO_PREALLOC) ||
+++	    attr->map_flags & ~LPM_CREATE_FLAG_MASK ||
+++	    !bpf_map_flags_access_ok(attr->map_flags) ||
+++	    attr->key_size < LPM_KEY_SIZE_MIN ||
+++	    attr->key_size > LPM_KEY_SIZE_MAX ||
+++	    attr->value_size < LPM_VAL_SIZE_MIN ||
+++	    attr->value_size > LPM_VAL_SIZE_MAX)
+++		return ERR_PTR(-EINVAL);
+++
+++	trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN);
+++	if (!trie)
+++		return ERR_PTR(-ENOMEM);
+++
+++	/* copy mandatory map attributes */
+++	bpf_map_init_from_attr(&trie->map, attr);
+++	trie->data_size = attr->key_size -
+++			  offsetof(struct bpf_lpm_trie_key, data);
+++	trie->max_prefixlen = trie->data_size * 8;
+++
+++	cost_per_node = sizeof(struct lpm_trie_node) +
+++			attr->value_size + trie->data_size;
+++	cost += (u64) attr->max_entries * cost_per_node;
+++
+++	ret = bpf_map_charge_init(&trie->map.memory, cost);
+++	if (ret)
+++		goto out_err;
+++
+++	raw_spin_lock_init(&trie->lock);
+++
+++	return &trie->map;
+++out_err:
+++	kfree(trie);
+++	return ERR_PTR(ret);
+++}
+++
+++static void trie_free(struct bpf_map *map)
+++{
+++	struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+++	struct lpm_trie_node __rcu **slot;
+++	struct lpm_trie_node *node;
+++
+++	/* Wait for outstanding programs to complete
+++	 * update/lookup/delete/get_next_key and free the trie.
+++	 */
+++	synchronize_rcu();
+++
+++	/* Always start at the root and walk down to a node that has no
+++	 * children. Then free that node, nullify its reference in the parent
+++	 * and start over.
+++	 */
+++
+++	for (;;) {
+++		slot = &trie->root;
+++
+++		for (;;) {
+++			node = rcu_dereference_protected(*slot, 1);
+++			if (!node)
+++				goto out;
+++
+++			if (rcu_access_pointer(node->child[0])) {
+++				slot = &node->child[0];
+++				continue;
+++			}
+++
+++			if (rcu_access_pointer(node->child[1])) {
+++				slot = &node->child[1];
+++				continue;
+++			}
+++
+++			kfree(node);
+++			RCU_INIT_POINTER(*slot, NULL);
+++			break;
+++		}
+++	}
+++
+++out:
+++	kfree(trie);
+++}
+++
+++static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
+++{
+++	struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root;
+++	struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+++	struct bpf_lpm_trie_key *key = _key, *next_key = _next_key;
+++	struct lpm_trie_node **node_stack = NULL;
+++	int err = 0, stack_ptr = -1;
+++	unsigned int next_bit;
+++	size_t matchlen;
+++
+++	/* The get_next_key follows postorder. For the 4 node example in
+++	 * the top of this file, the trie_get_next_key() returns the following
+++	 * one after another:
+++	 *   192.168.0.0/24
+++	 *   192.168.1.0/24
+++	 *   192.168.128.0/24
+++	 *   192.168.0.0/16
+++	 *
+++	 * The idea is to return more specific keys before less specific ones.
+++	 */
+++
+++	/* Empty trie */
+++	search_root = rcu_dereference(trie->root);
+++	if (!search_root)
+++		return -ENOENT;
+++
+++	/* For invalid key, find the leftmost node in the trie */
+++	if (!key || key->prefixlen > trie->max_prefixlen)
+++		goto find_leftmost;
+++
+++	node_stack = kmalloc_array(trie->max_prefixlen,
+++				   sizeof(struct lpm_trie_node *),
+++				   GFP_ATOMIC | __GFP_NOWARN);
+++	if (!node_stack)
+++		return -ENOMEM;
+++
+++	/* Try to find the exact node for the given key */
+++	for (node = search_root; node;) {
+++		node_stack[++stack_ptr] = node;
+++		matchlen = longest_prefix_match(trie, node, key);
+++		if (node->prefixlen != matchlen ||
+++		    node->prefixlen == key->prefixlen)
+++			break;
+++
+++		next_bit = extract_bit(key->data, node->prefixlen);
+++		node = rcu_dereference(node->child[next_bit]);
+++	}
+++	if (!node || node->prefixlen != key->prefixlen ||
+++	    (node->flags & LPM_TREE_NODE_FLAG_IM))
+++		goto find_leftmost;
+++
+++	/* The node with the exactly-matching key has been found,
+++	 * find the first node in postorder after the matched node.
+++	 */
+++	node = node_stack[stack_ptr];
+++	while (stack_ptr > 0) {
+++		parent = node_stack[stack_ptr - 1];
+++		if (rcu_dereference(parent->child[0]) == node) {
+++			search_root = rcu_dereference(parent->child[1]);
+++			if (search_root)
+++				goto find_leftmost;
+++		}
+++		if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) {
+++			next_node = parent;
+++			goto do_copy;
+++		}
+++
+++		node = parent;
+++		stack_ptr--;
+++	}
+++
+++	/* did not find anything */
+++	err = -ENOENT;
+++	goto free_stack;
+++
+++find_leftmost:
+++	/* Find the leftmost non-intermediate node, all intermediate nodes
+++	 * have exact two children, so this function will never return NULL.
+++	 */
+++	for (node = search_root; node;) {
+++		if (node->flags & LPM_TREE_NODE_FLAG_IM) {
+++			node = rcu_dereference(node->child[0]);
+++		} else {
+++			next_node = node;
+++			node = rcu_dereference(node->child[0]);
+++			if (!node)
+++				node = rcu_dereference(next_node->child[1]);
+++		}
+++	}
+++do_copy:
+++	next_key->prefixlen = next_node->prefixlen;
+++	memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data),
+++	       next_node->data, trie->data_size);
+++free_stack:
+++	kfree(node_stack);
+++	return err;
+++}
+++
+++static int trie_check_btf(const struct bpf_map *map,
+++			  const struct btf *btf,
+++			  const struct btf_type *key_type,
+++			  const struct btf_type *value_type)
+++{
+++	/* Keys must have struct bpf_lpm_trie_key embedded. */
+++	return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ?
+++	       -EINVAL : 0;
+++}
+++
+++const struct bpf_map_ops trie_map_ops = {
+++	.map_alloc = trie_alloc,
+++	.map_free = trie_free,
+++	.map_get_next_key = trie_get_next_key,
+++	.map_lookup_elem = trie_lookup_elem,
+++	.map_update_elem = trie_update_elem,
+++	.map_delete_elem = trie_delete_elem,
+++	.map_check_btf = trie_check_btf,
+++};
++--- a/kernel/bpf/Makefile
+++++ b/kernel/bpf/Makefile
++@@ -1,4 +1,23 @@
+++# SPDX-License-Identifier: GPL-2.0
++ obj-y := core.o
+++ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y)
+++# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
+++cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
+++endif
+++CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
++ 
++-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
++-obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o
+++obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
+++obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
+++obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
+++obj-$(CONFIG_BPF_SYSCALL) += disasm.o
+++obj-$(CONFIG_BPF_SYSCALL) += btf.o
+++ifeq ($(CONFIG_NET),y)
+++obj-$(CONFIG_BPF_SYSCALL) += devmap.o
+++endif
+++ifeq ($(CONFIG_PERF_EVENTS),y)
+++obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
+++endif
+++obj-$(CONFIG_CGROUP_BPF) += cgroup.o
+++ifeq ($(CONFIG_SYSFS),y)
+++obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o
+++endif
++--- /dev/null
+++++ b/kernel/bpf/map_in_map.c
++@@ -0,0 +1,120 @@
+++// SPDX-License-Identifier: GPL-2.0-only
+++/* Copyright (c) 2017 Facebook
+++ */
+++#include <linux/slab.h>
+++#include <linux/bpf.h>
+++
+++#include "map_in_map.h"
+++
+++struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
+++{
+++	struct bpf_map *inner_map, *inner_map_meta;
+++	u32 inner_map_meta_size;
+++	struct fd f;
+++
+++	f = fdget(inner_map_ufd);
+++	inner_map = __bpf_map_get(f);
+++	if (IS_ERR(inner_map))
+++		return inner_map;
+++
+++	/* prog_array->owner_prog_type and owner_jited
+++	 * is a runtime binding.  Doing static check alone
+++	 * in the verifier is not enough.
+++	 */
+++	if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
+++	    inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
+++	    inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
+++		fdput(f);
+++		return ERR_PTR(-ENOTSUPP);
+++	}
+++
+++	/* Does not support >1 level map-in-map */
+++	if (inner_map->inner_map_meta) {
+++		fdput(f);
+++		return ERR_PTR(-EINVAL);
+++	}
+++
+++	if (map_value_has_spin_lock(inner_map)) {
+++		fdput(f);
+++		return ERR_PTR(-ENOTSUPP);
+++	}
+++
+++	inner_map_meta_size = sizeof(*inner_map_meta);
+++	/* In some cases verifier needs to access beyond just base map. */
+++	if (inner_map->ops == &array_map_ops)
+++		inner_map_meta_size = sizeof(struct bpf_array);
+++
+++	inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER);
+++	if (!inner_map_meta) {
+++		fdput(f);
+++		return ERR_PTR(-ENOMEM);
+++	}
+++
+++	inner_map_meta->map_type = inner_map->map_type;
+++	inner_map_meta->key_size = inner_map->key_size;
+++	inner_map_meta->value_size = inner_map->value_size;
+++	inner_map_meta->map_flags = inner_map->map_flags;
+++	inner_map_meta->max_entries = inner_map->max_entries;
+++	inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
+++
+++	/* Misc members not needed in bpf_map_meta_equal() check. */
+++	inner_map_meta->ops = inner_map->ops;
+++	if (inner_map->ops == &array_map_ops) {
+++		inner_map_meta->unpriv_array = inner_map->unpriv_array;
+++		container_of(inner_map_meta, struct bpf_array, map)->index_mask =
+++		     container_of(inner_map, struct bpf_array, map)->index_mask;
+++	}
+++
+++	fdput(f);
+++	return inner_map_meta;
+++}
+++
+++void bpf_map_meta_free(struct bpf_map *map_meta)
+++{
+++	kfree(map_meta);
+++}
+++
+++bool bpf_map_meta_equal(const struct bpf_map *meta0,
+++			const struct bpf_map *meta1)
+++{
+++	/* No need to compare ops because it is covered by map_type */
+++	return meta0->map_type == meta1->map_type &&
+++		meta0->key_size == meta1->key_size &&
+++		meta0->value_size == meta1->value_size &&
+++		meta0->map_flags == meta1->map_flags &&
+++		meta0->max_entries == meta1->max_entries;
+++}
+++
+++void *bpf_map_fd_get_ptr(struct bpf_map *map,
+++			 struct file *map_file /* not used */,
+++			 int ufd)
+++{
+++	struct bpf_map *inner_map;
+++	struct fd f;
+++
+++	f = fdget(ufd);
+++	inner_map = __bpf_map_get(f);
+++	if (IS_ERR(inner_map))
+++		return inner_map;
+++
+++	if (bpf_map_meta_equal(map->inner_map_meta, inner_map))
+++		inner_map = bpf_map_inc(inner_map, false);
+++	else
+++		inner_map = ERR_PTR(-EINVAL);
+++
+++	fdput(f);
+++	return inner_map;
+++}
+++
+++void bpf_map_fd_put_ptr(void *ptr)
+++{
+++	/* ptr->ops->map_free() has to go through one
+++	 * rcu grace period by itself.
+++	 */
+++	bpf_map_put(ptr);
+++}
+++
+++u32 bpf_map_fd_sys_lookup_elem(void *ptr)
+++{
+++	return ((struct bpf_map *)ptr)->id;
+++}
++--- /dev/null
+++++ b/kernel/bpf/map_in_map.h
++@@ -0,0 +1,21 @@
+++/* SPDX-License-Identifier: GPL-2.0-only */
+++/* Copyright (c) 2017 Facebook
+++ */
+++#ifndef __MAP_IN_MAP_H__
+++#define __MAP_IN_MAP_H__
+++
+++#include <linux/types.h>
+++
+++struct file;
+++struct bpf_map;
+++
+++struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd);
+++void bpf_map_meta_free(struct bpf_map *map_meta);
+++bool bpf_map_meta_equal(const struct bpf_map *meta0,
+++			const struct bpf_map *meta1);
+++void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file,
+++			 int ufd);
+++void bpf_map_fd_put_ptr(void *ptr);
+++u32 bpf_map_fd_sys_lookup_elem(void *ptr);
+++
+++#endif
++--- /dev/null
+++++ b/kernel/bpf/percpu_freelist.c
++@@ -0,0 +1,118 @@
+++// SPDX-License-Identifier: GPL-2.0-only
+++/* Copyright (c) 2016 Facebook
+++ */
+++#include "percpu_freelist.h"
+++
+++int pcpu_freelist_init(struct pcpu_freelist *s)
+++{
+++	int cpu;
+++
+++	s->freelist = alloc_percpu(struct pcpu_freelist_head);
+++	if (!s->freelist)
+++		return -ENOMEM;
+++
+++	for_each_possible_cpu(cpu) {
+++		struct pcpu_freelist_head *head = per_cpu_ptr(s->freelist, cpu);
+++
+++		raw_spin_lock_init(&head->lock);
+++		head->first = NULL;
+++	}
+++	return 0;
+++}
+++
+++void pcpu_freelist_destroy(struct pcpu_freelist *s)
+++{
+++	free_percpu(s->freelist);
+++}
+++
+++static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
+++					 struct pcpu_freelist_node *node)
+++{
+++	raw_spin_lock(&head->lock);
+++	node->next = head->first;
+++	head->first = node;
+++	raw_spin_unlock(&head->lock);
+++}
+++
+++void __pcpu_freelist_push(struct pcpu_freelist *s,
+++			struct pcpu_freelist_node *node)
+++{
+++	struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
+++
+++	___pcpu_freelist_push(head, node);
+++}
+++
+++void pcpu_freelist_push(struct pcpu_freelist *s,
+++			struct pcpu_freelist_node *node)
+++{
+++	unsigned long flags;
+++
+++	local_irq_save(flags);
+++	__pcpu_freelist_push(s, node);
+++	local_irq_restore(flags);
+++}
+++
+++void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
+++			    u32 nr_elems)
+++{
+++	struct pcpu_freelist_head *head;
+++	unsigned long flags;
+++	int i, cpu, pcpu_entries;
+++
+++	pcpu_entries = nr_elems / num_possible_cpus() + 1;
+++	i = 0;
+++
+++	/* disable irq to workaround lockdep false positive
+++	 * in bpf usage pcpu_freelist_populate() will never race
+++	 * with pcpu_freelist_push()
+++	 */
+++	local_irq_save(flags);
+++	for_each_possible_cpu(cpu) {
+++again:
+++		head = per_cpu_ptr(s->freelist, cpu);
+++		___pcpu_freelist_push(head, buf);
+++		i++;
+++		buf += elem_size;
+++		if (i == nr_elems)
+++			break;
+++		if (i % pcpu_entries)
+++			goto again;
+++	}
+++	local_irq_restore(flags);
+++}
+++
+++struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
+++{
+++	struct pcpu_freelist_head *head;
+++	struct pcpu_freelist_node *node;
+++	int orig_cpu, cpu;
+++
+++	orig_cpu = cpu = raw_smp_processor_id();
+++	while (1) {
+++		head = per_cpu_ptr(s->freelist, cpu);
+++		raw_spin_lock(&head->lock);
+++		node = head->first;
+++		if (node) {
+++			head->first = node->next;
+++			raw_spin_unlock(&head->lock);
+++			return node;
+++		}
+++		raw_spin_unlock(&head->lock);
+++		cpu = cpumask_next(cpu, cpu_possible_mask);
+++		if (cpu >= nr_cpu_ids)
+++			cpu = 0;
+++		if (cpu == orig_cpu)
+++			return NULL;
+++	}
+++}
+++
+++struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
+++{
+++	struct pcpu_freelist_node *ret;
+++	unsigned long flags;
+++
+++	local_irq_save(flags);
+++	ret = __pcpu_freelist_pop(s);
+++	local_irq_restore(flags);
+++	return ret;
+++}
++--- /dev/null
+++++ b/kernel/bpf/percpu_freelist.h
++@@ -0,0 +1,32 @@
+++/* SPDX-License-Identifier: GPL-2.0-only */
+++/* Copyright (c) 2016 Facebook
+++ */
+++#ifndef __PERCPU_FREELIST_H__
+++#define __PERCPU_FREELIST_H__
+++#include <linux/spinlock.h>
+++#include <linux/percpu.h>
+++
+++struct pcpu_freelist_head {
+++	struct pcpu_freelist_node *first;
+++	raw_spinlock_t lock;
+++};
+++
+++struct pcpu_freelist {
+++	struct pcpu_freelist_head __percpu *freelist;
+++};
+++
+++struct pcpu_freelist_node {
+++	struct pcpu_freelist_node *next;
+++};
+++
+++/* pcpu_freelist_* do spin_lock_irqsave. */
+++void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
+++struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *);
+++/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */
+++void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
+++struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *);
+++void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
+++			    u32 nr_elems);
+++int pcpu_freelist_init(struct pcpu_freelist *);
+++void pcpu_freelist_destroy(struct pcpu_freelist *s);
+++#endif
++--- /dev/null
+++++ b/kernel/bpf/queue_stack_maps.c
++@@ -0,0 +1,289 @@
+++// SPDX-License-Identifier: GPL-2.0
+++/*
+++ * queue_stack_maps.c: BPF queue and stack maps
+++ *
+++ * Copyright (c) 2018 Politecnico di Torino
+++ */
+++#include <linux/bpf.h>
+++#include <linux/list.h>
+++#include <linux/slab.h>
+++#include <linux/capability.h>
+++#include "percpu_freelist.h"
+++
+++#define QUEUE_STACK_CREATE_FLAG_MASK \
+++	(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
+++
+++struct bpf_queue_stack {
+++	struct bpf_map map;
+++	raw_spinlock_t lock;
+++	u32 head, tail;
+++	u32 size; /* max_entries + 1 */
+++
+++	char elements[0] __aligned(8);
+++};
+++
+++static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
+++{
+++	return container_of(map, struct bpf_queue_stack, map);
+++}
+++
+++static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
+++{
+++	return qs->head == qs->tail;
+++}
+++
+++static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
+++{
+++	u32 head = qs->head + 1;
+++
+++	if (unlikely(head >= qs->size))
+++		head = 0;
+++
+++	return head == qs->tail;
+++}
+++
+++/* Called from syscall */
+++static int queue_stack_map_alloc_check(union bpf_attr *attr)
+++{
+++	if (!capable(CAP_SYS_ADMIN))
+++		return -EPERM;
+++
+++	/* check sanity of attributes */
+++	if (attr->max_entries == 0 || attr->key_size != 0 ||
+++	    attr->value_size == 0 ||
+++	    attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK ||
+++	    !bpf_map_flags_access_ok(attr->map_flags))
+++		return -EINVAL;
+++
+++	if (attr->value_size > KMALLOC_MAX_SIZE)
+++		/* if value_size is bigger, the user space won't be able to
+++		 * access the elements.
+++		 */
+++		return -E2BIG;
+++
+++	return 0;
+++}
+++
+++static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
+++{
+++	int ret, numa_node = bpf_map_attr_numa_node(attr);
+++	struct bpf_map_memory mem = {0};
+++	struct bpf_queue_stack *qs;
+++	u64 size, queue_size, cost;
+++
+++	size = (u64) attr->max_entries + 1;
+++	cost = queue_size = sizeof(*qs) + size * attr->value_size;
+++
+++	ret = bpf_map_charge_init(&mem, cost);
+++	if (ret < 0)
+++		return ERR_PTR(ret);
+++
+++	qs = bpf_map_area_alloc(queue_size, numa_node);
+++	if (!qs) {
+++		bpf_map_charge_finish(&mem);
+++		return ERR_PTR(-ENOMEM);
+++	}
+++
+++	memset(qs, 0, sizeof(*qs));
+++
+++	bpf_map_init_from_attr(&qs->map, attr);
+++
+++	bpf_map_charge_move(&qs->map.memory, &mem);
+++	qs->size = size;
+++
+++	raw_spin_lock_init(&qs->lock);
+++
+++	return &qs->map;
+++}
+++
+++/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
+++static void queue_stack_map_free(struct bpf_map *map)
+++{
+++	struct bpf_queue_stack *qs = bpf_queue_stack(map);
+++
+++	/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+++	 * so the programs (can be more than one that used this map) were
+++	 * disconnected from events. Wait for outstanding critical sections in
+++	 * these programs to complete
+++	 */
+++	synchronize_rcu();
+++
+++	bpf_map_area_free(qs);
+++}
+++
+++static int __queue_map_get(struct bpf_map *map, void *value, bool delete)
+++{
+++	struct bpf_queue_stack *qs = bpf_queue_stack(map);
+++	unsigned long flags;
+++	int err = 0;
+++	void *ptr;
+++
+++	raw_spin_lock_irqsave(&qs->lock, flags);
+++
+++	if (queue_stack_map_is_empty(qs)) {
+++		memset(value, 0, qs->map.value_size);
+++		err = -ENOENT;
+++		goto out;
+++	}
+++
+++	ptr = &qs->elements[qs->tail * qs->map.value_size];
+++	memcpy(value, ptr, qs->map.value_size);
+++
+++	if (delete) {
+++		if (unlikely(++qs->tail >= qs->size))
+++			qs->tail = 0;
+++	}
+++
+++out:
+++	raw_spin_unlock_irqrestore(&qs->lock, flags);
+++	return err;
+++}
+++
+++
+++static int __stack_map_get(struct bpf_map *map, void *value, bool delete)
+++{
+++	struct bpf_queue_stack *qs = bpf_queue_stack(map);
+++	unsigned long flags;
+++	int err = 0;
+++	void *ptr;
+++	u32 index;
+++
+++	raw_spin_lock_irqsave(&qs->lock, flags);
+++
+++	if (queue_stack_map_is_empty(qs)) {
+++		memset(value, 0, qs->map.value_size);
+++		err = -ENOENT;
+++		goto out;
+++	}
+++
+++	index = qs->head - 1;
+++	if (unlikely(index >= qs->size))
+++		index = qs->size - 1;
+++
+++	ptr = &qs->elements[index * qs->map.value_size];
+++	memcpy(value, ptr, qs->map.value_size);
+++
+++	if (delete)
+++		qs->head = index;
+++
+++out:
+++	raw_spin_unlock_irqrestore(&qs->lock, flags);
+++	return err;
+++}
+++
+++/* Called from syscall or from eBPF program */
+++static int queue_map_peek_elem(struct bpf_map *map, void *value)
+++{
+++	return __queue_map_get(map, value, false);
+++}
+++
+++/* Called from syscall or from eBPF program */
+++static int stack_map_peek_elem(struct bpf_map *map, void *value)
+++{
+++	return __stack_map_get(map, value, false);
+++}
+++
+++/* Called from syscall or from eBPF program */
+++static int queue_map_pop_elem(struct bpf_map *map, void *value)
+++{
+++	return __queue_map_get(map, value, true);
+++}
+++
+++/* Called from syscall or from eBPF program */
+++static int stack_map_pop_elem(struct bpf_map *map, void *value)
+++{
+++	return __stack_map_get(map, value, true);
+++}
+++
+++/* Called from syscall or from eBPF program */
+++static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
+++				     u64 flags)
+++{
+++	struct bpf_queue_stack *qs = bpf_queue_stack(map);
+++	unsigned long irq_flags;
+++	int err = 0;
+++	void *dst;
+++
+++	/* BPF_EXIST is used to force making room for a new element in case the
+++	 * map is full
+++	 */
+++	bool replace = (flags & BPF_EXIST);
+++
+++	/* Check supported flags for queue and stack maps */
+++	if (flags & BPF_NOEXIST || flags > BPF_EXIST)
+++		return -EINVAL;
+++
+++	raw_spin_lock_irqsave(&qs->lock, irq_flags);
+++
+++	if (queue_stack_map_is_full(qs)) {
+++		if (!replace) {
+++			err = -E2BIG;
+++			goto out;
+++		}
+++		/* advance tail pointer to overwrite oldest element */
+++		if (unlikely(++qs->tail >= qs->size))
+++			qs->tail = 0;
+++	}
+++
+++	dst = &qs->elements[qs->head * qs->map.value_size];
+++	memcpy(dst, value, qs->map.value_size);
+++
+++	if (unlikely(++qs->head >= qs->size))
+++		qs->head = 0;
+++
+++out:
+++	raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
+++	return err;
+++}
+++
+++/* Called from syscall or from eBPF program */
+++static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
+++{
+++	return NULL;
+++}
+++
+++/* Called from syscall or from eBPF program */
+++static int queue_stack_map_update_elem(struct bpf_map *map, void *key,
+++				       void *value, u64 flags)
+++{
+++	return -EINVAL;
+++}
+++
+++/* Called from syscall or from eBPF program */
+++static int queue_stack_map_delete_elem(struct bpf_map *map, void *key)
+++{
+++	return -EINVAL;
+++}
+++
+++/* Called from syscall */
+++static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
+++					void *next_key)
+++{
+++	return -EINVAL;
+++}
+++
+++const struct bpf_map_ops queue_map_ops = {
+++	.map_alloc_check = queue_stack_map_alloc_check,
+++	.map_alloc = queue_stack_map_alloc,
+++	.map_free = queue_stack_map_free,
+++	.map_lookup_elem = queue_stack_map_lookup_elem,
+++	.map_update_elem = queue_stack_map_update_elem,
+++	.map_delete_elem = queue_stack_map_delete_elem,
+++	.map_push_elem = queue_stack_map_push_elem,
+++	.map_pop_elem = queue_map_pop_elem,
+++	.map_peek_elem = queue_map_peek_elem,
+++	.map_get_next_key = queue_stack_map_get_next_key,
+++};
+++
+++const struct bpf_map_ops stack_map_ops = {
+++	.map_alloc_check = queue_stack_map_alloc_check,
+++	.map_alloc = queue_stack_map_alloc,
+++	.map_free = queue_stack_map_free,
+++	.map_lookup_elem = queue_stack_map_lookup_elem,
+++	.map_update_elem = queue_stack_map_update_elem,
+++	.map_delete_elem = queue_stack_map_delete_elem,
+++	.map_push_elem = queue_stack_map_push_elem,
+++	.map_pop_elem = stack_map_pop_elem,
+++	.map_peek_elem = stack_map_peek_elem,
+++	.map_get_next_key = queue_stack_map_get_next_key,
+++};
++--- /dev/null
+++++ b/kernel/bpf/stackmap.c
++@@ -0,0 +1,634 @@
+++// SPDX-License-Identifier: GPL-2.0-only
+++/* Copyright (c) 2016 Facebook
+++ */
+++#include <linux/bpf.h>
+++#include <linux/jhash.h>
+++#include <linux/filter.h>
+++#include <linux/stacktrace.h>
+++#include <linux/perf_event.h>
+++#include <linux/elf.h>
+++#include <linux/pagemap.h>
+++#include <linux/irq_work.h>
+++#include "percpu_freelist.h"
+++
+++#define STACK_CREATE_FLAG_MASK					\
+++	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY |	\
+++	 BPF_F_STACK_BUILD_ID)
+++
+++struct stack_map_bucket {
+++	struct pcpu_freelist_node fnode;
+++	u32 hash;
+++	u32 nr;
+++	u64 data[];
+++};
+++
+++struct bpf_stack_map {
+++	struct bpf_map map;
+++	void *elems;
+++	struct pcpu_freelist freelist;
+++	u32 n_buckets;
+++	struct stack_map_bucket *buckets[];
+++};
+++
+++/* irq_work to run up_read() for build_id lookup in nmi context */
+++struct stack_map_irq_work {
+++	struct irq_work irq_work;
+++	struct rw_semaphore *sem;
+++};
+++
+++static void do_up_read(struct irq_work *entry)
+++{
+++	struct stack_map_irq_work *work;
+++
+++	work = container_of(entry, struct stack_map_irq_work, irq_work);
+++	up_read_non_owner(work->sem);
+++	work->sem = NULL;
+++}
+++
+++static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
+++
+++static inline bool stack_map_use_build_id(struct bpf_map *map)
+++{
+++	return (map->map_flags & BPF_F_STACK_BUILD_ID);
+++}
+++
+++static inline int stack_map_data_size(struct bpf_map *map)
+++{
+++	return stack_map_use_build_id(map) ?
+++		sizeof(struct bpf_stack_build_id) : sizeof(u64);
+++}
+++
+++static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
+++{
+++	u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
+++	int err;
+++
+++	smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries,
+++					 smap->map.numa_node);
+++	if (!smap->elems)
+++		return -ENOMEM;
+++
+++	err = pcpu_freelist_init(&smap->freelist);
+++	if (err)
+++		goto free_elems;
+++
+++	pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size,
+++			       smap->map.max_entries);
+++	return 0;
+++
+++free_elems:
+++	bpf_map_area_free(smap->elems);
+++	return err;
+++}
+++
+++/* Called from syscall */
+++static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
+++{
+++	u32 value_size = attr->value_size;
+++	struct bpf_stack_map *smap;
+++	struct bpf_map_memory mem;
+++	u64 cost, n_buckets;
+++	int err;
+++
+++	if (!capable(CAP_SYS_ADMIN))
+++		return ERR_PTR(-EPERM);
+++
+++	if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
+++		return ERR_PTR(-EINVAL);
+++
+++	/* check sanity of attributes */
+++	if (attr->max_entries == 0 || attr->key_size != 4 ||
+++	    value_size < 8 || value_size % 8)
+++		return ERR_PTR(-EINVAL);
+++
+++	BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64));
+++	if (attr->map_flags & BPF_F_STACK_BUILD_ID) {
+++		if (value_size % sizeof(struct bpf_stack_build_id) ||
+++		    value_size / sizeof(struct bpf_stack_build_id)
+++		    > sysctl_perf_event_max_stack)
+++			return ERR_PTR(-EINVAL);
+++	} else if (value_size / 8 > sysctl_perf_event_max_stack)
+++		return ERR_PTR(-EINVAL);
+++
+++	/* hash table size must be power of 2 */
+++	n_buckets = roundup_pow_of_two(attr->max_entries);
+++	if (!n_buckets)
+++		return ERR_PTR(-E2BIG);
+++
+++	cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
+++	cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
+++	err = bpf_map_charge_init(&mem, cost);
+++	if (err)
+++		return ERR_PTR(err);
+++
+++	smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
+++	if (!smap) {
+++		bpf_map_charge_finish(&mem);
+++		return ERR_PTR(-ENOMEM);
+++	}
+++
+++	bpf_map_init_from_attr(&smap->map, attr);
+++	smap->map.value_size = value_size;
+++	smap->n_buckets = n_buckets;
+++
+++	err = get_callchain_buffers(sysctl_perf_event_max_stack);
+++	if (err)
+++		goto free_charge;
+++
+++	err = prealloc_elems_and_freelist(smap);
+++	if (err)
+++		goto put_buffers;
+++
+++	bpf_map_charge_move(&smap->map.memory, &mem);
+++
+++	return &smap->map;
+++
+++put_buffers:
+++	put_callchain_buffers();
+++free_charge:
+++	bpf_map_charge_finish(&mem);
+++	bpf_map_area_free(smap);
+++	return ERR_PTR(err);
+++}
+++
+++#define BPF_BUILD_ID 3
+++/*
+++ * Parse build id from the note segment. This logic can be shared between
+++ * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
+++ * identical.
+++ */
+++static inline int stack_map_parse_build_id(void *page_addr,
+++					   unsigned char *build_id,
+++					   void *note_start,
+++					   Elf32_Word note_size)
+++{
+++	Elf32_Word note_offs = 0, new_offs;
+++
+++	/* check for overflow */
+++	if (note_start < page_addr || note_start + note_size < note_start)
+++		return -EINVAL;
+++
+++	/* only supports note that fits in the first page */
+++	if (note_start + note_size > page_addr + PAGE_SIZE)
+++		return -EINVAL;
+++
+++	while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
+++		Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+++
+++		if (nhdr->n_type == BPF_BUILD_ID &&
+++		    nhdr->n_namesz == sizeof("GNU") &&
+++		    nhdr->n_descsz > 0 &&
+++		    nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
+++			memcpy(build_id,
+++			       note_start + note_offs +
+++			       ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
+++			       nhdr->n_descsz);
+++			memset(build_id + nhdr->n_descsz, 0,
+++			       BPF_BUILD_ID_SIZE - nhdr->n_descsz);
+++			return 0;
+++		}
+++		new_offs = note_offs + sizeof(Elf32_Nhdr) +
+++			ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+++		if (new_offs <= note_offs)  /* overflow */
+++			break;
+++		note_offs = new_offs;
+++	}
+++	return -EINVAL;
+++}
+++
+++/* Parse build ID from 32-bit ELF */
+++static int stack_map_get_build_id_32(void *page_addr,
+++				     unsigned char *build_id)
+++{
+++	Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
+++	Elf32_Phdr *phdr;
+++	int i;
+++
+++	/* only supports phdr that fits in one page */
+++	if (ehdr->e_phnum >
+++	    (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
+++		return -EINVAL;
+++
+++	phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
+++
+++	for (i = 0; i < ehdr->e_phnum; ++i)
+++		if (phdr[i].p_type == PT_NOTE)
+++			return stack_map_parse_build_id(page_addr, build_id,
+++					page_addr + phdr[i].p_offset,
+++					phdr[i].p_filesz);
+++	return -EINVAL;
+++}
+++
+++/* Parse build ID from 64-bit ELF */
+++static int stack_map_get_build_id_64(void *page_addr,
+++				     unsigned char *build_id)
+++{
+++	Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
+++	Elf64_Phdr *phdr;
+++	int i;
+++
+++	/* only supports phdr that fits in one page */
+++	if (ehdr->e_phnum >
+++	    (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
+++		return -EINVAL;
+++
+++	phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
+++
+++	for (i = 0; i < ehdr->e_phnum; ++i)
+++		if (phdr[i].p_type == PT_NOTE)
+++			return stack_map_parse_build_id(page_addr, build_id,
+++					page_addr + phdr[i].p_offset,
+++					phdr[i].p_filesz);
+++	return -EINVAL;
+++}
+++
+++/* Parse build ID of ELF file mapped to vma */
+++static int stack_map_get_build_id(struct vm_area_struct *vma,
+++				  unsigned char *build_id)
+++{
+++	Elf32_Ehdr *ehdr;
+++	struct page *page;
+++	void *page_addr;
+++	int ret;
+++
+++	/* only works for page backed storage  */
+++	if (!vma->vm_file)
+++		return -EINVAL;
+++
+++	page = find_get_page(vma->vm_file->f_mapping, 0);
+++	if (!page)
+++		return -EFAULT;	/* page not mapped */
+++
+++	ret = -EINVAL;
+++	page_addr = kmap_atomic(page);
+++	ehdr = (Elf32_Ehdr *)page_addr;
+++
+++	/* compare magic x7f "ELF" */
+++	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
+++		goto out;
+++
+++	/* only support executable file and shared object file */
+++	if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
+++		goto out;
+++
+++	if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
+++		ret = stack_map_get_build_id_32(page_addr, build_id);
+++	else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
+++		ret = stack_map_get_build_id_64(page_addr, build_id);
+++out:
+++	kunmap_atomic(page_addr);
+++	put_page(page);
+++	return ret;
+++}
+++
+++static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
+++					  u64 *ips, u32 trace_nr, bool user)
+++{
+++	int i;
+++	struct vm_area_struct *vma;
+++	bool irq_work_busy = false;
+++	struct stack_map_irq_work *work = NULL;
+++
+++	if (irqs_disabled()) {
+++		work = this_cpu_ptr(&up_read_work);
+++		if (work->irq_work.flags & IRQ_WORK_BUSY)
+++			/* cannot queue more up_read, fallback */
+++			irq_work_busy = true;
+++	}
+++
+++	/*
+++	 * We cannot do up_read() when the irq is disabled, because of
+++	 * risk to deadlock with rq_lock. To do build_id lookup when the
+++	 * irqs are disabled, we need to run up_read() in irq_work. We use
+++	 * a percpu variable to do the irq_work. If the irq_work is
+++	 * already used by another lookup, we fall back to report ips.
+++	 *
+++	 * Same fallback is used for kernel stack (!user) on a stackmap
+++	 * with build_id.
+++	 */
+++	if (!user || !current || !current->mm || irq_work_busy ||
+++	    down_read_trylock(&current->mm->mmap_sem) == 0) {
+++		/* cannot access current->mm, fall back to ips */
+++		for (i = 0; i < trace_nr; i++) {
+++			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
+++			id_offs[i].ip = ips[i];
+++			memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
+++		}
+++		return;
+++	}
+++
+++	for (i = 0; i < trace_nr; i++) {
+++		vma = find_vma(current->mm, ips[i]);
+++		if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
+++			/* per entry fall back to ips */
+++			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
+++			id_offs[i].ip = ips[i];
+++			memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
+++			continue;
+++		}
+++		id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
+++			- vma->vm_start;
+++		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
+++	}
+++
+++	if (!work) {
+++		up_read(&current->mm->mmap_sem);
+++	} else {
+++		work->sem = &current->mm->mmap_sem;
+++		irq_work_queue(&work->irq_work);
+++		/*
+++		 * The irq_work will release the mmap_sem with
+++		 * up_read_non_owner(). The rwsem_release() is called
+++		 * here to release the lock from lockdep's perspective.
+++		 */
+++		rwsem_release(&current->mm->mmap_sem.dep_map, 1, _RET_IP_);
+++	}
+++}
+++
+++BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
+++	   u64, flags)
+++{
+++	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
+++	struct perf_callchain_entry *trace;
+++	struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
+++	u32 max_depth = map->value_size / stack_map_data_size(map);
+++	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
+++	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
+++	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
+++	u32 hash, id, trace_nr, trace_len;
+++	bool user = flags & BPF_F_USER_STACK;
+++	bool kernel = !user;
+++	u64 *ips;
+++	bool hash_matches;
+++
+++	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+++			       BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
+++		return -EINVAL;
+++
+++	trace = get_perf_callchain(regs, init_nr, kernel, user,
+++				   sysctl_perf_event_max_stack, false, false);
+++
+++	if (unlikely(!trace))
+++		/* couldn't fetch the stack trace */
+++		return -EFAULT;
+++
+++	/* get_perf_callchain() guarantees that trace->nr >= init_nr
+++	 * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
+++	 */
+++	trace_nr = trace->nr - init_nr;
+++
+++	if (trace_nr <= skip)
+++		/* skipping more than usable stack trace */
+++		return -EFAULT;
+++
+++	trace_nr -= skip;
+++	trace_len = trace_nr * sizeof(u64);
+++	ips = trace->ip + skip + init_nr;
+++	hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
+++	id = hash & (smap->n_buckets - 1);
+++	bucket = READ_ONCE(smap->buckets[id]);
+++
+++	hash_matches = bucket && bucket->hash == hash;
+++	/* fast cmp */
+++	if (hash_matches && flags & BPF_F_FAST_STACK_CMP)
+++		return id;
+++
+++	if (stack_map_use_build_id(map)) {
+++		/* for build_id+offset, pop a bucket before slow cmp */
+++		new_bucket = (struct stack_map_bucket *)
+++			pcpu_freelist_pop(&smap->freelist);
+++		if (unlikely(!new_bucket))
+++			return -ENOMEM;
+++		new_bucket->nr = trace_nr;
+++		stack_map_get_build_id_offset(
+++			(struct bpf_stack_build_id *)new_bucket->data,
+++			ips, trace_nr, user);
+++		trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
+++		if (hash_matches && bucket->nr == trace_nr &&
+++		    memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
+++			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
+++			return id;
+++		}
+++		if (bucket && !(flags & BPF_F_REUSE_STACKID)) {
+++			pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
+++			return -EEXIST;
+++		}
+++	} else {
+++		if (hash_matches && bucket->nr == trace_nr &&
+++		    memcmp(bucket->data, ips, trace_len) == 0)
+++			return id;
+++		if (bucket && !(flags & BPF_F_REUSE_STACKID))
+++			return -EEXIST;
+++
+++		new_bucket = (struct stack_map_bucket *)
+++			pcpu_freelist_pop(&smap->freelist);
+++		if (unlikely(!new_bucket))
+++			return -ENOMEM;
+++		memcpy(new_bucket->data, ips, trace_len);
+++	}
+++
+++	new_bucket->hash = hash;
+++	new_bucket->nr = trace_nr;
+++
+++	old_bucket = xchg(&smap->buckets[id], new_bucket);
+++	if (old_bucket)
+++		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
+++	return id;
+++}
+++
+++const struct bpf_func_proto bpf_get_stackid_proto = {
+++	.func		= bpf_get_stackid,
+++	.gpl_only	= true,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_CONST_MAP_PTR,
+++	.arg3_type	= ARG_ANYTHING,
+++};
+++
+++BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
+++	   u64, flags)
+++{
+++	u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
+++	bool user_build_id = flags & BPF_F_USER_BUILD_ID;
+++	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
+++	bool user = flags & BPF_F_USER_STACK;
+++	struct perf_callchain_entry *trace;
+++	bool kernel = !user;
+++	int err = -EINVAL;
+++	u64 *ips;
+++
+++	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+++			       BPF_F_USER_BUILD_ID)))
+++		goto clear;
+++	if (kernel && user_build_id)
+++		goto clear;
+++
+++	elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
+++					    : sizeof(u64);
+++	if (unlikely(size % elem_size))
+++		goto clear;
+++
+++	num_elem = size / elem_size;
+++	if (sysctl_perf_event_max_stack < num_elem)
+++		init_nr = 0;
+++	else
+++		init_nr = sysctl_perf_event_max_stack - num_elem;
+++	trace = get_perf_callchain(regs, init_nr, kernel, user,
+++				   sysctl_perf_event_max_stack, false, false);
+++	if (unlikely(!trace))
+++		goto err_fault;
+++
+++	trace_nr = trace->nr - init_nr;
+++	if (trace_nr < skip)
+++		goto err_fault;
+++
+++	trace_nr -= skip;
+++	trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
+++	copy_len = trace_nr * elem_size;
+++	ips = trace->ip + skip + init_nr;
+++	if (user && user_build_id)
+++		stack_map_get_build_id_offset(buf, ips, trace_nr, user);
+++	else
+++		memcpy(buf, ips, copy_len);
+++
+++	if (size > copy_len)
+++		memset(buf + copy_len, 0, size - copy_len);
+++	return copy_len;
+++
+++err_fault:
+++	err = -EFAULT;
+++clear:
+++	memset(buf, 0, size);
+++	return err;
+++}
+++
+++const struct bpf_func_proto bpf_get_stack_proto = {
+++	.func		= bpf_get_stack,
+++	.gpl_only	= true,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
+++	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
+++	.arg4_type	= ARG_ANYTHING,
+++};
+++
+++/* Called from eBPF program */
+++static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
+++{
+++	return ERR_PTR(-EOPNOTSUPP);
+++}
+++
+++/* Called from syscall */
+++int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
+++{
+++	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
+++	struct stack_map_bucket *bucket, *old_bucket;
+++	u32 id = *(u32 *)key, trace_len;
+++
+++	if (unlikely(id >= smap->n_buckets))
+++		return -ENOENT;
+++
+++	bucket = xchg(&smap->buckets[id], NULL);
+++	if (!bucket)
+++		return -ENOENT;
+++
+++	trace_len = bucket->nr * stack_map_data_size(map);
+++	memcpy(value, bucket->data, trace_len);
+++	memset(value + trace_len, 0, map->value_size - trace_len);
+++
+++	old_bucket = xchg(&smap->buckets[id], bucket);
+++	if (old_bucket)
+++		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
+++	return 0;
+++}
+++
+++static int stack_map_get_next_key(struct bpf_map *map, void *key,
+++				  void *next_key)
+++{
+++	struct bpf_stack_map *smap = container_of(map,
+++						  struct bpf_stack_map, map);
+++	u32 id;
+++
+++	WARN_ON_ONCE(!rcu_read_lock_held());
+++
+++	if (!key) {
+++		id = 0;
+++	} else {
+++		id = *(u32 *)key;
+++		if (id >= smap->n_buckets || !smap->buckets[id])
+++			id = 0;
+++		else
+++			id++;
+++	}
+++
+++	while (id < smap->n_buckets && !smap->buckets[id])
+++		id++;
+++
+++	if (id >= smap->n_buckets)
+++		return -ENOENT;
+++
+++	*(u32 *)next_key = id;
+++	return 0;
+++}
+++
+++static int stack_map_update_elem(struct bpf_map *map, void *key, void *value,
+++				 u64 map_flags)
+++{
+++	return -EINVAL;
+++}
+++
+++/* Called from syscall or from eBPF program */
+++static int stack_map_delete_elem(struct bpf_map *map, void *key)
+++{
+++	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
+++	struct stack_map_bucket *old_bucket;
+++	u32 id = *(u32 *)key;
+++
+++	if (unlikely(id >= smap->n_buckets))
+++		return -E2BIG;
+++
+++	old_bucket = xchg(&smap->buckets[id], NULL);
+++	if (old_bucket) {
+++		pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
+++		return 0;
+++	} else {
+++		return -ENOENT;
+++	}
+++}
+++
+++/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
+++static void stack_map_free(struct bpf_map *map)
+++{
+++	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
+++
+++	/* wait for bpf programs to complete before freeing stack map */
+++	synchronize_rcu();
+++
+++	bpf_map_area_free(smap->elems);
+++	pcpu_freelist_destroy(&smap->freelist);
+++	bpf_map_area_free(smap);
+++	put_callchain_buffers();
+++}
+++
+++const struct bpf_map_ops stack_trace_map_ops = {
+++	.map_alloc = stack_map_alloc,
+++	.map_free = stack_map_free,
+++	.map_get_next_key = stack_map_get_next_key,
+++	.map_lookup_elem = stack_map_lookup_elem,
+++	.map_update_elem = stack_map_update_elem,
+++	.map_delete_elem = stack_map_delete_elem,
+++	.map_check_btf = map_check_no_btf,
+++};
+++
+++static int __init stack_map_init(void)
+++{
+++	int cpu;
+++	struct stack_map_irq_work *work;
+++
+++	for_each_possible_cpu(cpu) {
+++		work = per_cpu_ptr(&up_read_work, cpu);
+++		init_irq_work(&work->irq_work, do_up_read);
+++	}
+++	return 0;
+++}
+++subsys_initcall(stack_map_init);
++--- a/kernel/bpf/syscall.c
+++++ b/kernel/bpf/syscall.c
++@@ -1,106 +1,333 @@
+++// SPDX-License-Identifier: GPL-2.0-only
++ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
++- *
++- * This program is free software; you can redistribute it and/or
++- * modify it under the terms of version 2 of the GNU General Public
++- * License as published by the Free Software Foundation.
++- *
++- * This program is distributed in the hope that it will be useful, but
++- * WITHOUT ANY WARRANTY; without even the implied warranty of
++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++- * General Public License for more details.
++  */
++ #include <linux/bpf.h>
+++#include <linux/bpf_trace.h>
+++#include <linux/btf.h>
++ #include <linux/syscalls.h>
++ #include <linux/slab.h>
+++#include <linux/vmalloc.h>
+++#include <linux/mmzone.h>
++ #include <linux/anon_inodes.h>
+++#include <linux/fdtable.h>
++ #include <linux/file.h>
+++#include <linux/fs.h>
++ #include <linux/license.h>
++ #include <linux/filter.h>
++ #include <linux/version.h>
+++#include <linux/kernel.h>
+++#include <linux/idr.h>
+++#include <linux/cred.h>
+++#include <linux/timekeeping.h>
+++#include <linux/ctype.h>
+++
+++#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
+++			   (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
+++			   (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
+++			   (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+++#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+++#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
+++
+++#define BPF_OBJ_FLAG_MASK   (BPF_F_RDONLY | BPF_F_WRONLY)
+++
+++DEFINE_PER_CPU(int, bpf_prog_active);
+++static DEFINE_IDR(prog_idr);
+++static DEFINE_SPINLOCK(prog_idr_lock);
+++static DEFINE_IDR(map_idr);
+++static DEFINE_SPINLOCK(map_idr_lock);
++ 
++ int sysctl_unprivileged_bpf_disabled __read_mostly;
++ 
++-static LIST_HEAD(bpf_map_types);
+++static const struct bpf_map_ops * const bpf_map_types[] = {
+++#define BPF_PROG_TYPE(_id, _ops)
+++#define BPF_MAP_TYPE(_id, _ops) \
+++	[_id] = &_ops,
+++#include <linux/bpf_types.h>
+++#undef BPF_PROG_TYPE
+++#undef BPF_MAP_TYPE
+++};
+++
+++/*
+++ * If we're handed a bigger struct than we know of, ensure all the unknown bits
+++ * are 0 - i.e. new user-space does not rely on any kernel feature extensions
+++ * we don't know about yet.
+++ *
+++ * There is a ToCToU between this function call and the following
+++ * copy_from_user() call. However, this is not a concern since this function is
+++ * meant to be a future-proofing of bits.
+++ */
+++int bpf_check_uarg_tail_zero(void __user *uaddr,
+++			     size_t expected_size,
+++			     size_t actual_size)
+++{
+++	unsigned char __user *addr;
+++	unsigned char __user *end;
+++	unsigned char val;
+++	int err;
+++
+++	if (unlikely(actual_size > PAGE_SIZE))	/* silly large */
+++		return -E2BIG;
+++
+++	if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size)))
+++		return -EFAULT;
+++
+++	if (actual_size <= expected_size)
+++		return 0;
+++
+++	addr = uaddr + expected_size;
+++	end  = uaddr + actual_size;
+++
+++	for (; addr < end; addr++) {
+++		err = get_user(val, addr);
+++		if (err)
+++			return err;
+++		if (val)
+++			return -E2BIG;
+++	}
+++
+++	return 0;
+++}
++ 
++ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
++ {
++-	struct bpf_map_type_list *tl;
+++	const struct bpf_map_ops *ops;
+++	u32 type = attr->map_type;
++ 	struct bpf_map *map;
+++	int err;
++ 
++-	list_for_each_entry(tl, &bpf_map_types, list_node) {
++-		if (tl->type == attr->map_type) {
++-			map = tl->ops->map_alloc(attr);
++-			if (IS_ERR(map))
++-				return map;
++-			map->ops = tl->ops;
++-			map->map_type = attr->map_type;
++-			return map;
++-		}
+++	if (type >= ARRAY_SIZE(bpf_map_types))
+++		return ERR_PTR(-EINVAL);
+++	ops = bpf_map_types[type];
+++	if (!ops)
+++		return ERR_PTR(-EINVAL);
+++
+++	if (ops->map_alloc_check) {
+++		err = ops->map_alloc_check(attr);
+++		if (err)
+++			return ERR_PTR(err);
+++	}
+++	map = ops->map_alloc(attr);
+++	if (IS_ERR(map))
+++		return map;
+++	map->ops = ops;
+++	map->map_type = type;
+++	return map;
+++}
+++
+++void *bpf_map_area_alloc(u64 size, int numa_node)
+++{
+++	/* We really just want to fail instead of triggering OOM killer
+++	 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
+++	 * which is used for lower order allocation requests.
+++	 *
+++	 * It has been observed that higher order allocation requests done by
+++	 * vmalloc with __GFP_NORETRY being set might fail due to not trying
+++	 * to reclaim memory from the page cache, thus we set
+++	 * __GFP_RETRY_MAYFAIL to avoid such situations.
+++	 */
+++
+++	const gfp_t flags = __GFP_NOWARN | __GFP_ZERO;
+++	void *area;
+++
+++	if (size >= SIZE_MAX)
+++		return NULL;
+++
+++	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+++		area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
+++				    numa_node);
+++		if (area != NULL)
+++			return area;
++ 	}
++-	return ERR_PTR(-EINVAL);
+++
+++	return __vmalloc_node_range(size, 1,
+++				    VMALLOC_START, VMALLOC_END,
+++				    GFP_KERNEL | flags,
+++				    PAGE_KERNEL, 0, numa_node,
+++				    __builtin_return_address(0));
++ }
++ 
++-/* boot time registration of different map implementations */
++-void bpf_register_map_type(struct bpf_map_type_list *tl)
+++void bpf_map_area_free(void *area)
++ {
++-	list_add(&tl->list_node, &bpf_map_types);
+++	kvfree(area);
++ }
++ 
++-static int bpf_map_charge_memlock(struct bpf_map *map)
+++static u32 bpf_map_flags_retain_permanent(u32 flags)
++ {
++-	struct user_struct *user = get_current_user();
++-	unsigned long memlock_limit;
+++	/* Some map creation flags are not tied to the map object but
+++	 * rather to the map fd instead, so they have no meaning upon
+++	 * map object inspection since multiple file descriptors with
+++	 * different (access) properties can exist here. Thus, given
+++	 * this has zero meaning for the map itself, lets clear these
+++	 * from here.
+++	 */
+++	return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
+++}
+++
+++void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
+++{
+++	map->map_type = attr->map_type;
+++	map->key_size = attr->key_size;
+++	map->value_size = attr->value_size;
+++	map->max_entries = attr->max_entries;
+++	map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
+++	map->numa_node = bpf_map_attr_numa_node(attr);
+++}
+++
+++static int bpf_charge_memlock(struct user_struct *user, u32 pages)
+++{
+++	unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+++
+++	if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
+++		atomic_long_sub(pages, &user->locked_vm);
+++		return -EPERM;
+++	}
+++	return 0;
+++}
+++
+++static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
+++{
+++	if (user)
+++		atomic_long_sub(pages, &user->locked_vm);
+++}
++ 
++-	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+++int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
+++{
+++	u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
+++	struct user_struct *user;
+++	int ret;
++ 
++-	atomic_long_add(map->pages, &user->locked_vm);
+++	if (size >= U32_MAX - PAGE_SIZE)
+++		return -E2BIG;
++ 
++-	if (atomic_long_read(&user->locked_vm) > memlock_limit) {
++-		atomic_long_sub(map->pages, &user->locked_vm);
+++	user = get_current_user();
+++	ret = bpf_charge_memlock(user, pages);
+++	if (ret) {
++ 		free_uid(user);
++-		return -EPERM;
+++		return ret;
++ 	}
++-	map->user = user;
+++
+++	mem->pages = pages;
+++	mem->user = user;
+++
++ 	return 0;
++ }
++ 
++-static void bpf_map_uncharge_memlock(struct bpf_map *map)
+++void bpf_map_charge_finish(struct bpf_map_memory *mem)
++ {
++-	struct user_struct *user = map->user;
+++	bpf_uncharge_memlock(mem->user, mem->pages);
+++	free_uid(mem->user);
+++}
++ 
++-	atomic_long_sub(map->pages, &user->locked_vm);
++-	free_uid(user);
+++void bpf_map_charge_move(struct bpf_map_memory *dst,
+++			 struct bpf_map_memory *src)
+++{
+++	*dst = *src;
+++
+++	/* Make sure src will not be used for the redundant uncharging. */
+++	memset(src, 0, sizeof(struct bpf_map_memory));
+++}
+++
+++int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
+++{
+++	int ret;
+++
+++	ret = bpf_charge_memlock(map->memory.user, pages);
+++	if (ret)
+++		return ret;
+++	map->memory.pages += pages;
+++	return ret;
+++}
+++
+++void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
+++{
+++	bpf_uncharge_memlock(map->memory.user, pages);
+++	map->memory.pages -= pages;
+++}
+++
+++static int bpf_map_alloc_id(struct bpf_map *map)
+++{
+++	int id;
+++
+++	idr_preload(GFP_KERNEL);
+++	spin_lock_bh(&map_idr_lock);
+++	id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
+++	if (id > 0)
+++		map->id = id;
+++	spin_unlock_bh(&map_idr_lock);
+++	idr_preload_end();
+++
+++	if (WARN_ON_ONCE(!id))
+++		return -ENOSPC;
+++
+++	return id > 0 ? 0 : id;
+++}
+++
+++void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
+++{
+++	unsigned long flags;
+++
+++	/* Offloaded maps are removed from the IDR store when their device
+++	 * disappears - even if someone holds an fd to them they are unusable,
+++	 * the memory is gone, all ops will fail; they are simply waiting for
+++	 * refcnt to drop to be freed.
+++	 */
+++	if (!map->id)
+++		return;
+++
+++	if (do_idr_lock)
+++		spin_lock_irqsave(&map_idr_lock, flags);
+++	else
+++		__acquire(&map_idr_lock);
+++
+++	idr_remove(&map_idr, map->id);
+++	map->id = 0;
+++
+++	if (do_idr_lock)
+++		spin_unlock_irqrestore(&map_idr_lock, flags);
+++	else
+++		__release(&map_idr_lock);
++ }
++ 
++ /* called from workqueue */
++ static void bpf_map_free_deferred(struct work_struct *work)
++ {
++ 	struct bpf_map *map = container_of(work, struct bpf_map, work);
+++	struct bpf_map_memory mem;
++ 
++-	bpf_map_uncharge_memlock(map);
+++	bpf_map_charge_move(&mem, &map->memory);
++ 	/* implementation dependent freeing */
++ 	map->ops->map_free(map);
+++	bpf_map_charge_finish(&mem);
++ }
++ 
++ static void bpf_map_put_uref(struct bpf_map *map)
++ {
++ 	if (atomic_dec_and_test(&map->usercnt)) {
++-		if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
++-			bpf_fd_array_map_clear(map);
+++		if (map->ops->map_release_uref)
+++			map->ops->map_release_uref(map);
++ 	}
++ }
++ 
++ /* decrement map refcnt and schedule it for freeing via workqueue
++  * (unrelying map implementation ops->map_free() might sleep)
++  */
++-void bpf_map_put(struct bpf_map *map)
+++static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
++ {
++ 	if (atomic_dec_and_test(&map->refcnt)) {
+++		/* bpf_map_free_id() must be called first */
+++		bpf_map_free_id(map, do_idr_lock);
+++		btf_put(map->btf);
++ 		INIT_WORK(&map->work, bpf_map_free_deferred);
++ 		schedule_work(&map->work);
++ 	}
++ }
++ 
+++void bpf_map_put(struct bpf_map *map)
+++{
+++	__bpf_map_put(map, true);
+++}
+++EXPORT_SYMBOL_GPL(bpf_map_put);
+++
++ void bpf_map_put_with_uref(struct bpf_map *map)
++ {
++ 	bpf_map_put_uref(map);
++@@ -109,18 +336,110 @@ void bpf_map_put_with_uref(struct bpf_ma
++ 
++ static int bpf_map_release(struct inode *inode, struct file *filp)
++ {
++-	bpf_map_put_with_uref(filp->private_data);
+++	struct bpf_map *map = filp->private_data;
+++
+++	if (map->ops->map_release)
+++		map->ops->map_release(map, filp);
+++
+++	bpf_map_put_with_uref(map);
++ 	return 0;
++ }
++ 
++-static const struct file_operations bpf_map_fops = {
++-	.release = bpf_map_release,
+++static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
+++{
+++	fmode_t mode = f.file->f_mode;
+++
+++	/* Our file permissions may have been overridden by global
+++	 * map permissions facing syscall side.
+++	 */
+++	if (READ_ONCE(map->frozen))
+++		mode &= ~FMODE_CAN_WRITE;
+++	return mode;
+++}
+++
+++#ifdef CONFIG_PROC_FS
+++static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
+++{
+++	const struct bpf_map *map = filp->private_data;
+++	const struct bpf_array *array;
+++	u32 owner_prog_type = 0;
+++	u32 owner_jited = 0;
+++
+++	if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
+++		array = container_of(map, struct bpf_array, map);
+++		owner_prog_type = array->owner_prog_type;
+++		owner_jited = array->owner_jited;
+++	}
+++
+++	seq_printf(m,
+++		   "map_type:\t%u\n"
+++		   "key_size:\t%u\n"
+++		   "value_size:\t%u\n"
+++		   "max_entries:\t%u\n"
+++		   "map_flags:\t%#x\n"
+++		   "memlock:\t%llu\n"
+++		   "map_id:\t%u\n"
+++		   "frozen:\t%u\n",
+++		   map->map_type,
+++		   map->key_size,
+++		   map->value_size,
+++		   map->max_entries,
+++		   map->map_flags,
+++		   map->memory.pages * 1ULL << PAGE_SHIFT,
+++		   map->id,
+++		   READ_ONCE(map->frozen));
+++
+++	if (owner_prog_type) {
+++		seq_printf(m, "owner_prog_type:\t%u\n",
+++			   owner_prog_type);
+++		seq_printf(m, "owner_jited:\t%u\n",
+++			   owner_jited);
+++	}
+++}
+++#endif
+++
+++static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz,
+++			      loff_t *ppos)
+++{
+++	/* We need this handler such that alloc_file() enables
+++	 * f_mode with FMODE_CAN_READ.
+++	 */
+++	return -EINVAL;
+++}
+++
+++static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
+++			       size_t siz, loff_t *ppos)
+++{
+++	/* We need this handler such that alloc_file() enables
+++	 * f_mode with FMODE_CAN_WRITE.
+++	 */
+++	return -EINVAL;
+++}
+++
+++const struct file_operations bpf_map_fops = {
+++#ifdef CONFIG_PROC_FS
+++	.show_fdinfo	= bpf_map_show_fdinfo,
+++#endif
+++	.release	= bpf_map_release,
+++	.read		= bpf_dummy_read,
+++	.write		= bpf_dummy_write,
++ };
++ 
++-int bpf_map_new_fd(struct bpf_map *map)
+++int bpf_map_new_fd(struct bpf_map *map, int flags)
++ {
++ 	return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
++-				O_RDWR | O_CLOEXEC);
+++				flags | O_CLOEXEC);
+++}
+++
+++int bpf_get_file_flag(int flags)
+++{
+++	if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
+++		return -EINVAL;
+++	if (flags & BPF_F_RDONLY)
+++		return O_RDONLY;
+++	if (flags & BPF_F_WRONLY)
+++		return O_WRONLY;
+++	return O_RDWR;
++ }
++ 
++ /* helper macro to check that unused fields 'union bpf_attr' are zero */
++@@ -131,38 +450,171 @@ int bpf_map_new_fd(struct bpf_map *map)
++ 		   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
++ 		   sizeof(attr->CMD##_LAST_FIELD)) != NULL
++ 
++-#define BPF_MAP_CREATE_LAST_FIELD max_entries
+++/* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes.
+++ * Return 0 on success and < 0 on error.
+++ */
+++static int bpf_obj_name_cpy(char *dst, const char *src)
+++{
+++	const char *end = src + BPF_OBJ_NAME_LEN;
+++
+++	memset(dst, 0, BPF_OBJ_NAME_LEN);
+++	/* Copy all isalnum(), '_' and '.' chars. */
+++	while (src < end && *src) {
+++		if (!isalnum(*src) &&
+++		    *src != '_' && *src != '.')
+++			return -EINVAL;
+++		*dst++ = *src++;
+++	}
+++
+++	/* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */
+++	if (src == end)
+++		return -EINVAL;
+++
+++	return 0;
+++}
+++
+++int map_check_no_btf(const struct bpf_map *map,
+++		     const struct btf *btf,
+++		     const struct btf_type *key_type,
+++		     const struct btf_type *value_type)
+++{
+++	return -ENOTSUPP;
+++}
+++
+++static int map_check_btf(struct bpf_map *map, const struct btf *btf,
+++			 u32 btf_key_id, u32 btf_value_id)
+++{
+++	const struct btf_type *key_type, *value_type;
+++	u32 key_size, value_size;
+++	int ret = 0;
+++
+++	/* Some maps allow key to be unspecified. */
+++	if (btf_key_id) {
+++		key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
+++		if (!key_type || key_size != map->key_size)
+++			return -EINVAL;
+++	} else {
+++		key_type = btf_type_by_id(btf, 0);
+++		if (!map->ops->map_check_btf)
+++			return -EINVAL;
+++	}
+++
+++	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
+++	if (!value_type || value_size != map->value_size)
+++		return -EINVAL;
+++
+++	map->spin_lock_off = btf_find_spin_lock(btf, value_type);
+++
+++	if (map_value_has_spin_lock(map)) {
+++		if (map->map_flags & BPF_F_RDONLY_PROG)
+++			return -EACCES;
+++		if (map->map_type != BPF_MAP_TYPE_HASH &&
+++		    map->map_type != BPF_MAP_TYPE_ARRAY &&
+++		    map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
+++		    map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+++			return -ENOTSUPP;
+++		if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
+++		    map->value_size) {
+++			WARN_ONCE(1,
+++				  "verifier bug spin_lock_off %d value_size %d\n",
+++				  map->spin_lock_off, map->value_size);
+++			return -EFAULT;
+++		}
+++	}
+++
+++	if (map->ops->map_check_btf)
+++		ret = map->ops->map_check_btf(map, btf, key_type, value_type);
+++
+++	return ret;
+++}
+++
+++#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
++ /* called via syscall */
++ static int map_create(union bpf_attr *attr)
++ {
+++	int numa_node = bpf_map_attr_numa_node(attr);
+++	struct bpf_map_memory mem;
++ 	struct bpf_map *map;
+++	int f_flags;
++ 	int err;
++ 
++ 	err = CHECK_ATTR(BPF_MAP_CREATE);
++ 	if (err)
++ 		return -EINVAL;
++ 
+++	f_flags = bpf_get_file_flag(attr->map_flags);
+++	if (f_flags < 0)
+++		return f_flags;
+++
+++	if (numa_node != NUMA_NO_NODE &&
+++	    ((unsigned int)numa_node >= nr_node_ids ||
+++	     !node_online(numa_node)))
+++		return -EINVAL;
+++
++ 	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
++ 	map = find_and_alloc_map(attr);
++ 	if (IS_ERR(map))
++ 		return PTR_ERR(map);
++ 
+++	err = bpf_obj_name_cpy(map->name, attr->map_name);
+++	if (err)
+++		goto free_map;
+++
++ 	atomic_set(&map->refcnt, 1);
++ 	atomic_set(&map->usercnt, 1);
++ 
++-	err = bpf_map_charge_memlock(map);
+++	if (attr->btf_key_type_id || attr->btf_value_type_id) {
+++		struct btf *btf;
+++
+++		if (!attr->btf_value_type_id) {
+++			err = -EINVAL;
+++			goto free_map;
+++		}
+++
+++		btf = btf_get_by_fd(attr->btf_fd);
+++		if (IS_ERR(btf)) {
+++			err = PTR_ERR(btf);
+++			goto free_map;
+++		}
+++
+++		err = map_check_btf(map, btf, attr->btf_key_type_id,
+++				    attr->btf_value_type_id);
+++		if (err) {
+++			btf_put(btf);
+++			goto free_map;
+++		}
+++
+++		map->btf = btf;
+++		map->btf_key_type_id = attr->btf_key_type_id;
+++		map->btf_value_type_id = attr->btf_value_type_id;
+++	} else {
+++		map->spin_lock_off = -EINVAL;
+++	}
+++
+++	err = bpf_map_alloc_id(map);
++ 	if (err)
++-		goto free_map;
+++		goto free_map_sec;
++ 
++-	err = bpf_map_new_fd(map);
++-	if (err < 0)
++-		/* failed to allocate fd */
++-		goto free_map;
+++	err = bpf_map_new_fd(map, f_flags);
+++	if (err < 0) {
+++		/* failed to allocate fd.
+++		 * bpf_map_put_with_uref() is needed because the above
+++		 * bpf_map_alloc_id() has published the map
+++		 * to the userspace and the userspace may
+++		 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
+++		 */
+++		bpf_map_put_with_uref(map);
+++		return err;
+++	}
++ 
++ 	return err;
++ 
+++free_map_sec:
++ free_map:
+++	btf_put(map->btf);
+++	bpf_map_charge_move(&mem, &map->memory);
++ 	map->ops->map_free(map);
+++	bpf_map_charge_finish(&mem);
++ 	return err;
++ }
++ 
++@@ -194,6 +646,7 @@ struct bpf_map *bpf_map_inc(struct bpf_m
++ 		atomic_inc(&map->usercnt);
++ 	return map;
++ }
+++EXPORT_SYMBOL_GPL(bpf_map_inc);
++ 
++ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
++ {
++@@ -210,59 +663,155 @@ struct bpf_map *bpf_map_get_with_uref(u3
++ 	return map;
++ }
++ 
++-/* helper to convert user pointers passed inside __aligned_u64 fields */
++-static void __user *u64_to_ptr(__u64 val)
+++/* map_idr_lock should have been held */
+++static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map,
+++					      bool uref)
+++{
+++	int refold;
+++
+++	refold = atomic_fetch_add_unless(&map->refcnt, 1, 0);
+++
+++	if (refold >= BPF_MAX_REFCNT) {
+++		__bpf_map_put(map, false);
+++		return ERR_PTR(-EBUSY);
+++	}
+++
+++	if (!refold)
+++		return ERR_PTR(-ENOENT);
+++
+++	if (uref)
+++		atomic_inc(&map->usercnt);
+++
+++	return map;
+++}
+++
+++struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
+++{
+++	spin_lock_bh(&map_idr_lock);
+++	map = __bpf_map_inc_not_zero(map, uref);
+++	spin_unlock_bh(&map_idr_lock);
+++
+++	return map;
+++}
+++EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
+++
+++int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
+++{
+++	return -ENOTSUPP;
+++}
+++
+++static void *__bpf_copy_key(void __user *ukey, u64 key_size)
++ {
++-	return (void __user *) (unsigned long) val;
+++	if (key_size)
+++		return memdup_user(ukey, key_size);
+++
+++	if (ukey)
+++		return ERR_PTR(-EINVAL);
+++
+++	return NULL;
++ }
++ 
++ /* last field in 'union bpf_attr' used by this command */
++-#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
+++#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
++ 
++ static int map_lookup_elem(union bpf_attr *attr)
++ {
++-	void __user *ukey = u64_to_ptr(attr->key);
++-	void __user *uvalue = u64_to_ptr(attr->value);
+++	void __user *ukey = u64_to_user_ptr(attr->key);
+++	void __user *uvalue = u64_to_user_ptr(attr->value);
++ 	int ufd = attr->map_fd;
++ 	struct bpf_map *map;
++ 	void *key, *value, *ptr;
+++	u32 value_size;
++ 	struct fd f;
++ 	int err;
++ 
++ 	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
++ 		return -EINVAL;
++ 
+++	if (attr->flags & ~BPF_F_LOCK)
+++		return -EINVAL;
+++
++ 	f = fdget(ufd);
++ 	map = __bpf_map_get(f);
++ 	if (IS_ERR(map))
++ 		return PTR_ERR(map);
+++	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
+++		err = -EPERM;
+++		goto err_put;
+++	}
++ 
++-	err = -ENOMEM;
++-	key = kmalloc(map->key_size, GFP_USER);
++-	if (!key)
+++	if ((attr->flags & BPF_F_LOCK) &&
+++	    !map_value_has_spin_lock(map)) {
+++		err = -EINVAL;
++ 		goto err_put;
+++	}
++ 
++-	err = -EFAULT;
++-	if (copy_from_user(key, ukey, map->key_size) != 0)
++-		goto free_key;
+++	key = __bpf_copy_key(ukey, map->key_size);
+++	if (IS_ERR(key)) {
+++		err = PTR_ERR(key);
+++		goto err_put;
+++	}
+++
+++	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+++	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+++	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+++		value_size = round_up(map->value_size, 8) * num_possible_cpus();
+++	else if (IS_FD_MAP(map))
+++		value_size = sizeof(u32);
+++	else
+++		value_size = map->value_size;
++ 
++ 	err = -ENOMEM;
++-	value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
+++	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
++ 	if (!value)
++ 		goto free_key;
++ 
++-	rcu_read_lock();
++-	ptr = map->ops->map_lookup_elem(map, key);
++-	if (ptr)
++-		memcpy(value, ptr, map->value_size);
++-	rcu_read_unlock();
+++	preempt_disable();
+++	this_cpu_inc(bpf_prog_active);
+++	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+++	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+++		err = bpf_percpu_hash_copy(map, key, value);
+++	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+++		err = bpf_percpu_array_copy(map, key, value);
+++	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
+++		err = bpf_stackmap_copy(map, key, value);
+++	} else if (IS_FD_ARRAY(map)) {
+++		err = bpf_fd_array_map_lookup_elem(map, key, value);
+++	} else if (IS_FD_HASH(map)) {
+++		err = bpf_fd_htab_map_lookup_elem(map, key, value);
+++	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+++		   map->map_type == BPF_MAP_TYPE_STACK) {
+++		err = map->ops->map_peek_elem(map, value);
+++	} else {
+++		rcu_read_lock();
+++		if (map->ops->map_lookup_elem_sys_only)
+++			ptr = map->ops->map_lookup_elem_sys_only(map, key);
+++		else
+++			ptr = map->ops->map_lookup_elem(map, key);
+++		if (IS_ERR(ptr)) {
+++			err = PTR_ERR(ptr);
+++		} else if (!ptr) {
+++			err = -ENOENT;
+++		} else {
+++			err = 0;
+++			if (attr->flags & BPF_F_LOCK)
+++				/* lock 'ptr' and copy everything but lock */
+++				copy_map_value_locked(map, value, ptr, true);
+++			else
+++				copy_map_value(map, value, ptr);
+++			/* mask lock, since value wasn't zero inited */
+++			check_and_init_map_lock(map, value);
+++		}
+++		rcu_read_unlock();
+++	}
+++	this_cpu_dec(bpf_prog_active);
+++	preempt_enable();
++ 
++-	err = -ENOENT;
++-	if (!ptr)
+++	if (err)
++ 		goto free_value;
++ 
++ 	err = -EFAULT;
++-	if (copy_to_user(uvalue, value, map->value_size) != 0)
+++	if (copy_to_user(uvalue, value, value_size) != 0)
++ 		goto free_value;
++ 
++ 	err = 0;
++@@ -276,15 +825,27 @@ err_put:
++ 	return err;
++ }
++ 
+++static void maybe_wait_bpf_programs(struct bpf_map *map)
+++{
+++	/* Wait for any running BPF programs to complete so that
+++	 * userspace, when we return to it, knows that all programs
+++	 * that could be running use the new map value.
+++	 */
+++	if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
+++	    map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+++		synchronize_rcu();
+++}
+++
++ #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
++ 
++ static int map_update_elem(union bpf_attr *attr)
++ {
++-	void __user *ukey = u64_to_ptr(attr->key);
++-	void __user *uvalue = u64_to_ptr(attr->value);
+++	void __user *ukey = u64_to_user_ptr(attr->key);
+++	void __user *uvalue = u64_to_user_ptr(attr->value);
++ 	int ufd = attr->map_fd;
++ 	struct bpf_map *map;
++ 	void *key, *value;
+++	u32 value_size;
++ 	struct fd f;
++ 	int err;
++ 
++@@ -295,32 +856,79 @@ static int map_update_elem(union bpf_att
++ 	map = __bpf_map_get(f);
++ 	if (IS_ERR(map))
++ 		return PTR_ERR(map);
+++	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+++		err = -EPERM;
+++		goto err_put;
+++	}
++ 
++-	err = -ENOMEM;
++-	key = kmalloc(map->key_size, GFP_USER);
++-	if (!key)
+++	if ((attr->flags & BPF_F_LOCK) &&
+++	    !map_value_has_spin_lock(map)) {
+++		err = -EINVAL;
+++		goto err_put;
+++	}
+++
+++	key = __bpf_copy_key(ukey, map->key_size);
+++	if (IS_ERR(key)) {
+++		err = PTR_ERR(key);
++ 		goto err_put;
+++	}
++ 
++-	err = -EFAULT;
++-	if (copy_from_user(key, ukey, map->key_size) != 0)
++-		goto free_key;
+++	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+++	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+++	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+++		value_size = round_up(map->value_size, 8) * num_possible_cpus();
+++	else
+++		value_size = map->value_size;
++ 
++ 	err = -ENOMEM;
++-	value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
+++	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
++ 	if (!value)
++ 		goto free_key;
++ 
++ 	err = -EFAULT;
++-	if (copy_from_user(value, uvalue, map->value_size) != 0)
+++	if (copy_from_user(value, uvalue, value_size) != 0)
++ 		goto free_value;
++ 
++-	/* eBPF program that use maps are running under rcu_read_lock(),
++-	 * therefore all map accessors rely on this fact, so do the same here
++-	 */
++-	rcu_read_lock();
++-	err = map->ops->map_update_elem(map, key, value, attr->flags);
++-	rcu_read_unlock();
+++	/* Need to create a kthread, thus must support schedule */
+++	if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
+++		   map->map_type == BPF_MAP_TYPE_SOCKHASH ||
+++		   map->map_type == BPF_MAP_TYPE_SOCKMAP) {
+++		err = map->ops->map_update_elem(map, key, value, attr->flags);
+++		goto out;
+++	}
++ 
+++	/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+++	 * inside bpf map update or delete otherwise deadlocks are possible
+++	 */
+++	preempt_disable();
+++	__this_cpu_inc(bpf_prog_active);
+++	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+++	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+++		err = bpf_percpu_hash_update(map, key, value, attr->flags);
+++	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
+++		err = bpf_percpu_array_update(map, key, value, attr->flags);
+++	} else if (IS_FD_ARRAY(map)) {
+++		rcu_read_lock();
+++		err = bpf_fd_array_map_update_elem(map, f.file, key, value,
+++						   attr->flags);
+++		rcu_read_unlock();
+++	} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+++		rcu_read_lock();
+++		err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
+++						  attr->flags);
+++		rcu_read_unlock();
+++	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+++		   map->map_type == BPF_MAP_TYPE_STACK) {
+++		err = map->ops->map_push_elem(map, value, attr->flags);
+++	} else {
+++		rcu_read_lock();
+++		err = map->ops->map_update_elem(map, key, value, attr->flags);
+++		rcu_read_unlock();
+++	}
+++	__this_cpu_dec(bpf_prog_active);
+++	preempt_enable();
+++	maybe_wait_bpf_programs(map);
+++out:
++ free_value:
++ 	kfree(value);
++ free_key:
++@@ -334,7 +942,7 @@ err_put:
++ 
++ static int map_delete_elem(union bpf_attr *attr)
++ {
++-	void __user *ukey = u64_to_ptr(attr->key);
+++	void __user *ukey = u64_to_user_ptr(attr->key);
++ 	int ufd = attr->map_fd;
++ 	struct bpf_map *map;
++ 	struct fd f;
++@@ -348,21 +956,25 @@ static int map_delete_elem(union bpf_att
++ 	map = __bpf_map_get(f);
++ 	if (IS_ERR(map))
++ 		return PTR_ERR(map);
++-
++-	err = -ENOMEM;
++-	key = kmalloc(map->key_size, GFP_USER);
++-	if (!key)
+++	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+++		err = -EPERM;
++ 		goto err_put;
+++	}
++ 
++-	err = -EFAULT;
++-	if (copy_from_user(key, ukey, map->key_size) != 0)
++-		goto free_key;
+++	key = __bpf_copy_key(ukey, map->key_size);
+++	if (IS_ERR(key)) {
+++		err = PTR_ERR(key);
+++		goto err_put;
+++	}
++ 
+++	preempt_disable();
+++	__this_cpu_inc(bpf_prog_active);
++ 	rcu_read_lock();
++ 	err = map->ops->map_delete_elem(map, key);
++ 	rcu_read_unlock();
++-
++-free_key:
+++	__this_cpu_dec(bpf_prog_active);
+++	preempt_enable();
+++	maybe_wait_bpf_programs(map);
++ 	kfree(key);
++ err_put:
++ 	fdput(f);
++@@ -374,8 +986,8 @@ err_put:
++ 
++ static int map_get_next_key(union bpf_attr *attr)
++ {
++-	void __user *ukey = u64_to_ptr(attr->key);
++-	void __user *unext_key = u64_to_ptr(attr->next_key);
+++	void __user *ukey = u64_to_user_ptr(attr->key);
+++	void __user *unext_key = u64_to_user_ptr(attr->next_key);
++ 	int ufd = attr->map_fd;
++ 	struct bpf_map *map;
++ 	void *key, *next_key;
++@@ -389,15 +1001,20 @@ static int map_get_next_key(union bpf_at
++ 	map = __bpf_map_get(f);
++ 	if (IS_ERR(map))
++ 		return PTR_ERR(map);
++-
++-	err = -ENOMEM;
++-	key = kmalloc(map->key_size, GFP_USER);
++-	if (!key)
+++	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
+++		err = -EPERM;
++ 		goto err_put;
+++	}
++ 
++-	err = -EFAULT;
++-	if (copy_from_user(key, ukey, map->key_size) != 0)
++-		goto free_key;
+++	if (ukey) {
+++		key = __bpf_copy_key(ukey, map->key_size);
+++		if (IS_ERR(key)) {
+++			err = PTR_ERR(key);
+++			goto err_put;
+++		}
+++	} else {
+++		key = NULL;
+++	}
++ 
++ 	err = -ENOMEM;
++ 	next_key = kmalloc(map->key_size, GFP_USER);
++@@ -425,77 +1042,126 @@ err_put:
++ 	return err;
++ }
++ 
++-static LIST_HEAD(bpf_prog_types);
+++#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
++ 
++-static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
+++static int map_lookup_and_delete_elem(union bpf_attr *attr)
++ {
++-	struct bpf_prog_type_list *tl;
+++	void __user *ukey = u64_to_user_ptr(attr->key);
+++	void __user *uvalue = u64_to_user_ptr(attr->value);
+++	int ufd = attr->map_fd;
+++	struct bpf_map *map;
+++	void *key, *value;
+++	u32 value_size;
+++	struct fd f;
+++	int err;
++ 
++-	list_for_each_entry(tl, &bpf_prog_types, list_node) {
++-		if (tl->type == type) {
++-			prog->aux->ops = tl->ops;
++-			prog->type = type;
++-			return 0;
++-		}
+++	if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
+++		return -EINVAL;
+++
+++	f = fdget(ufd);
+++	map = __bpf_map_get(f);
+++	if (IS_ERR(map))
+++		return PTR_ERR(map);
+++	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
+++	    !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+++		err = -EPERM;
+++		goto err_put;
++ 	}
++ 
++-	return -EINVAL;
++-}
+++	key = __bpf_copy_key(ukey, map->key_size);
+++	if (IS_ERR(key)) {
+++		err = PTR_ERR(key);
+++		goto err_put;
+++	}
++ 
++-void bpf_register_prog_type(struct bpf_prog_type_list *tl)
++-{
++-	list_add(&tl->list_node, &bpf_prog_types);
+++	value_size = map->value_size;
+++
+++	err = -ENOMEM;
+++	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+++	if (!value)
+++		goto free_key;
+++
+++	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+++	    map->map_type == BPF_MAP_TYPE_STACK) {
+++		err = map->ops->map_pop_elem(map, value);
+++	} else {
+++		err = -ENOTSUPP;
+++	}
+++
+++	if (err)
+++		goto free_value;
+++
+++	if (copy_to_user(uvalue, value, value_size) != 0) {
+++		err = -EFAULT;
+++		goto free_value;
+++	}
+++
+++	err = 0;
+++
+++free_value:
+++	kfree(value);
+++free_key:
+++	kfree(key);
+++err_put:
+++	fdput(f);
+++	return err;
++ }
++ 
++-/* fixup insn->imm field of bpf_call instructions:
++- * if (insn->imm == BPF_FUNC_map_lookup_elem)
++- *      insn->imm = bpf_map_lookup_elem - __bpf_call_base;
++- * else if (insn->imm == BPF_FUNC_map_update_elem)
++- *      insn->imm = bpf_map_update_elem - __bpf_call_base;
++- * else ...
++- *
++- * this function is called after eBPF program passed verification
++- */
++-static void fixup_bpf_calls(struct bpf_prog *prog)
+++#define BPF_MAP_FREEZE_LAST_FIELD map_fd
+++
+++static int map_freeze(const union bpf_attr *attr)
++ {
++-	const struct bpf_func_proto *fn;
++-	int i;
+++	int err = 0, ufd = attr->map_fd;
+++	struct bpf_map *map;
+++	struct fd f;
++ 
++-	for (i = 0; i < prog->len; i++) {
++-		struct bpf_insn *insn = &prog->insnsi[i];
+++	if (CHECK_ATTR(BPF_MAP_FREEZE))
+++		return -EINVAL;
++ 
++-		if (insn->code == (BPF_JMP | BPF_CALL)) {
++-			/* we reach here when program has bpf_call instructions
++-			 * and it passed bpf_check(), means that
++-			 * ops->get_func_proto must have been supplied, check it
++-			 */
++-			BUG_ON(!prog->aux->ops->get_func_proto);
+++	f = fdget(ufd);
+++	map = __bpf_map_get(f);
+++	if (IS_ERR(map))
+++		return PTR_ERR(map);
+++	if (READ_ONCE(map->frozen)) {
+++		err = -EBUSY;
+++		goto err_put;
+++	}
+++	if (!capable(CAP_SYS_ADMIN)) {
+++		err = -EPERM;
+++		goto err_put;
+++	}
++ 
++-			if (insn->imm == BPF_FUNC_get_route_realm)
++-				prog->dst_needed = 1;
++-			if (insn->imm == BPF_FUNC_get_prandom_u32)
++-				bpf_user_rnd_init_once();
++-			if (insn->imm == BPF_FUNC_tail_call) {
++-				/* mark bpf_tail_call as different opcode
++-				 * to avoid conditional branch in
++-				 * interpeter for every normal call
++-				 * and to prevent accidental JITing by
++-				 * JIT compiler that doesn't support
++-				 * bpf_tail_call yet
++-				 */
++-				insn->imm = 0;
++-				insn->code |= BPF_X;
++-				continue;
++-			}
+++	WRITE_ONCE(map->frozen, true);
+++err_put:
+++	fdput(f);
+++	return err;
+++}
++ 
++-			fn = prog->aux->ops->get_func_proto(insn->imm);
++-			/* all functions that have prototype and verifier allowed
++-			 * programs to call them, must be real in-kernel functions
++-			 */
++-			BUG_ON(!fn->func);
++-			insn->imm = fn->func - __bpf_call_base;
++-		}
++-	}
+++static const struct bpf_prog_ops * const bpf_prog_types[] = {
+++#define BPF_PROG_TYPE(_id, _name) \
+++	[_id] = & _name ## _prog_ops,
+++#define BPF_MAP_TYPE(_id, _ops)
+++#include <linux/bpf_types.h>
+++#undef BPF_PROG_TYPE
+++#undef BPF_MAP_TYPE
+++};
+++
+++static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
+++{
+++	const struct bpf_prog_ops *ops;
+++
+++	if (type >= ARRAY_SIZE(bpf_prog_types))
+++		return -EINVAL;
+++	ops = bpf_prog_types[type];
+++	if (!ops)
+++		return -EINVAL;
+++
+++	if (!bpf_prog_is_dev_bound(prog->aux))
+++		prog->aux->ops = ops;
+++	else
+++		return -EINVAL;
+++	prog->type = type;
+++	return 0;
++ }
++ 
++ /* drop refcnt on maps used by eBPF program and free auxilary data */
++@@ -509,19 +1175,39 @@ static void free_used_maps(struct bpf_pr
++ 	kfree(aux->used_maps);
++ }
++ 
+++int __bpf_prog_charge(struct user_struct *user, u32 pages)
+++{
+++	unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+++	unsigned long user_bufs;
+++
+++	if (user) {
+++		user_bufs = atomic_long_add_return(pages, &user->locked_vm);
+++		if (user_bufs > memlock_limit) {
+++			atomic_long_sub(pages, &user->locked_vm);
+++			return -EPERM;
+++		}
+++	}
+++
+++	return 0;
+++}
+++
+++void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+++{
+++	if (user)
+++		atomic_long_sub(pages, &user->locked_vm);
+++}
+++
++ static int bpf_prog_charge_memlock(struct bpf_prog *prog)
++ {
++ 	struct user_struct *user = get_current_user();
++-	unsigned long memlock_limit;
+++	int ret;
++ 
++-	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
++-
++-	atomic_long_add(prog->pages, &user->locked_vm);
++-	if (atomic_long_read(&user->locked_vm) > memlock_limit) {
++-		atomic_long_sub(prog->pages, &user->locked_vm);
+++	ret = __bpf_prog_charge(user, prog->pages);
+++	if (ret) {
++ 		free_uid(user);
++-		return -EPERM;
+++		return ret;
++ 	}
+++
++ 	prog->aux->user = user;
++ 	return 0;
++ }
++@@ -530,30 +1216,87 @@ static void bpf_prog_uncharge_memlock(st
++ {
++ 	struct user_struct *user = prog->aux->user;
++ 
++-	atomic_long_sub(prog->pages, &user->locked_vm);
+++	__bpf_prog_uncharge(user, prog->pages);
++ 	free_uid(user);
++ }
++ 
++-static void __prog_put_common(struct rcu_head *rcu)
+++static int bpf_prog_alloc_id(struct bpf_prog *prog)
+++{
+++	int id;
+++
+++	idr_preload(GFP_KERNEL);
+++	spin_lock_bh(&prog_idr_lock);
+++	id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
+++	if (id > 0)
+++		prog->aux->id = id;
+++	spin_unlock_bh(&prog_idr_lock);
+++	idr_preload_end();
+++
+++	/* id is in [1, INT_MAX) */
+++	if (WARN_ON_ONCE(!id))
+++		return -ENOSPC;
+++
+++	return id > 0 ? 0 : id;
+++}
+++
+++void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
+++{
+++	/* cBPF to eBPF migrations are currently not in the idr store.
+++	 * Offloaded programs are removed from the store when their device
+++	 * disappears - even if someone grabs an fd to them they are unusable,
+++	 * simply waiting for refcnt to drop to be freed.
+++	 */
+++	if (!prog->aux->id)
+++		return;
+++
+++	if (do_idr_lock)
+++		spin_lock_bh(&prog_idr_lock);
+++	else
+++		__acquire(&prog_idr_lock);
+++
+++	idr_remove(&prog_idr, prog->aux->id);
+++	prog->aux->id = 0;
+++
+++	if (do_idr_lock)
+++		spin_unlock_bh(&prog_idr_lock);
+++	else
+++		__release(&prog_idr_lock);
+++}
+++
+++static void __bpf_prog_put_rcu(struct rcu_head *rcu)
++ {
++ 	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
++ 
+++	kvfree(aux->func_info);
++ 	free_used_maps(aux);
++ 	bpf_prog_uncharge_memlock(aux->prog);
++ 	bpf_prog_free(aux->prog);
++ }
++ 
++-/* version of bpf_prog_put() that is called after a grace period */
++-void bpf_prog_put_rcu(struct bpf_prog *prog)
+++static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
+++{
+++	bpf_prog_kallsyms_del_all(prog);
+++	btf_put(prog->aux->btf);
+++	bpf_prog_free_linfo(prog);
+++
+++	if (deferred)
+++		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+++	else
+++		__bpf_prog_put_rcu(&prog->aux->rcu);
+++}
+++
+++static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
++ {
++-	if (atomic_dec_and_test(&prog->aux->refcnt))
++-		call_rcu(&prog->aux->rcu, __prog_put_common);
+++	if (atomic_dec_and_test(&prog->aux->refcnt)) {
+++		/* bpf_prog_free_id() must be called first */
+++		bpf_prog_free_id(prog, do_idr_lock);
+++		__bpf_prog_put_noref(prog, true);
+++	}
++ }
++ 
++ void bpf_prog_put(struct bpf_prog *prog)
++ {
++-	if (atomic_dec_and_test(&prog->aux->refcnt))
++-		__prog_put_common(&prog->aux->rcu);
+++	__bpf_prog_put(prog, true);
++ }
++ EXPORT_SYMBOL_GPL(bpf_prog_put);
++ 
++@@ -561,12 +1304,68 @@ static int bpf_prog_release(struct inode
++ {
++ 	struct bpf_prog *prog = filp->private_data;
++ 
++-	bpf_prog_put_rcu(prog);
+++	bpf_prog_put(prog);
++ 	return 0;
++ }
++ 
++-static const struct file_operations bpf_prog_fops = {
++-        .release = bpf_prog_release,
+++static void bpf_prog_get_stats(const struct bpf_prog *prog,
+++			       struct bpf_prog_stats *stats)
+++{
+++	u64 nsecs = 0, cnt = 0;
+++	int cpu;
+++
+++	for_each_possible_cpu(cpu) {
+++		const struct bpf_prog_stats *st;
+++		unsigned int start;
+++		u64 tnsecs, tcnt;
+++
+++		st = per_cpu_ptr(prog->aux->stats, cpu);
+++		do {
+++			start = u64_stats_fetch_begin_irq(&st->syncp);
+++			tnsecs = st->nsecs;
+++			tcnt = st->cnt;
+++		} while (u64_stats_fetch_retry_irq(&st->syncp, start));
+++		nsecs += tnsecs;
+++		cnt += tcnt;
+++	}
+++	stats->nsecs = nsecs;
+++	stats->cnt = cnt;
+++}
+++
+++#ifdef CONFIG_PROC_FS
+++static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
+++{
+++	const struct bpf_prog *prog = filp->private_data;
+++	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
+++	struct bpf_prog_stats stats;
+++
+++	bpf_prog_get_stats(prog, &stats);
+++	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
+++	seq_printf(m,
+++		   "prog_type:\t%u\n"
+++		   "prog_jited:\t%u\n"
+++		   "prog_tag:\t%s\n"
+++		   "memlock:\t%llu\n"
+++		   "prog_id:\t%u\n"
+++		   "run_time_ns:\t%llu\n"
+++		   "run_cnt:\t%llu\n",
+++		   prog->type,
+++		   prog->jited,
+++		   prog_tag,
+++		   prog->pages * 1ULL << PAGE_SHIFT,
+++		   prog->aux->id,
+++		   stats.nsecs,
+++		   stats.cnt);
+++}
+++#endif
+++
+++const struct file_operations bpf_prog_fops = {
+++#ifdef CONFIG_PROC_FS
+++	.show_fdinfo	= bpf_prog_show_fdinfo,
+++#endif
+++	.release	= bpf_prog_release,
+++	.read		= bpf_dummy_read,
+++	.write		= bpf_dummy_write,
++ };
++ 
++ int bpf_prog_new_fd(struct bpf_prog *prog)
++@@ -575,7 +1374,7 @@ int bpf_prog_new_fd(struct bpf_prog *pro
++ 				O_RDWR | O_CLOEXEC);
++ }
++ 
++-static struct bpf_prog *__bpf_prog_get(struct fd f)
+++static struct bpf_prog *____bpf_prog_get(struct fd f)
++ {
++ 	if (!f.file)
++ 		return ERR_PTR(-EBADF);
++@@ -587,38 +1386,178 @@ static struct bpf_prog *__bpf_prog_get(s
++ 	return f.file->private_data;
++ }
++ 
+++struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
+++{
+++	if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
+++		atomic_sub(i, &prog->aux->refcnt);
+++		return ERR_PTR(-EBUSY);
+++	}
+++	return prog;
+++}
+++EXPORT_SYMBOL_GPL(bpf_prog_add);
+++
+++void bpf_prog_sub(struct bpf_prog *prog, int i)
+++{
+++	/* Only to be used for undoing previous bpf_prog_add() in some
+++	 * error path. We still know that another entity in our call
+++	 * path holds a reference to the program, thus atomic_sub() can
+++	 * be safely used in such cases!
+++	 */
+++	WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
+++}
+++EXPORT_SYMBOL_GPL(bpf_prog_sub);
+++
++ struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
++ {
++-	if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) {
++-		atomic_dec(&prog->aux->refcnt);
+++	return bpf_prog_add(prog, 1);
+++}
+++EXPORT_SYMBOL_GPL(bpf_prog_inc);
+++
+++/* prog_idr_lock should have been held */
+++struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
+++{
+++	int refold;
+++
+++	refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0);
+++
+++	if (refold >= BPF_MAX_REFCNT) {
+++		__bpf_prog_put(prog, false);
++ 		return ERR_PTR(-EBUSY);
++ 	}
+++
+++	if (!refold)
+++		return ERR_PTR(-ENOENT);
+++
++ 	return prog;
++ }
+++EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
++ 
++-/* called by sockets/tracing/seccomp before attaching program to an event
++- * pairs with bpf_prog_put()
++- */
++-struct bpf_prog *bpf_prog_get(u32 ufd)
+++bool bpf_prog_get_ok(struct bpf_prog *prog,
+++			    enum bpf_prog_type *attach_type, bool attach_drv)
+++{
+++	/* not an attachment, just a refcount inc, always allow */
+++	if (!attach_type)
+++		return true;
+++
+++	if (prog->type != *attach_type)
+++		return false;
+++	if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv)
+++		return false;
+++
+++	return true;
+++}
+++
+++static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
+++				       bool attach_drv)
++ {
++ 	struct fd f = fdget(ufd);
++ 	struct bpf_prog *prog;
++ 
++-	prog = __bpf_prog_get(f);
+++	prog = ____bpf_prog_get(f);
++ 	if (IS_ERR(prog))
++ 		return prog;
+++	if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
+++		prog = ERR_PTR(-EINVAL);
+++		goto out;
+++	}
++ 
++ 	prog = bpf_prog_inc(prog);
+++out:
++ 	fdput(f);
++-
++ 	return prog;
++ }
++-EXPORT_SYMBOL_GPL(bpf_prog_get);
+++
+++struct bpf_prog *bpf_prog_get(u32 ufd)
+++{
+++	return __bpf_prog_get(ufd, NULL, false);
+++}
+++
+++struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
+++				       bool attach_drv)
+++{
+++	return __bpf_prog_get(ufd, &type, attach_drv);
+++}
+++EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
+++
+++/* Initially all BPF programs could be loaded w/o specifying
+++ * expected_attach_type. Later for some of them specifying expected_attach_type
+++ * at load time became required so that program could be validated properly.
+++ * Programs of types that are allowed to be loaded both w/ and w/o (for
+++ * backward compatibility) expected_attach_type, should have the default attach
+++ * type assigned to expected_attach_type for the latter case, so that it can be
+++ * validated later at attach time.
+++ *
+++ * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
+++ * prog type requires it but has some attach types that have to be backward
+++ * compatible.
+++ */
+++static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
+++{
+++	switch (attr->prog_type) {
+++	case BPF_PROG_TYPE_CGROUP_SOCK:
+++		/* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
+++		 * exist so checking for non-zero is the way to go here.
+++		 */
+++		if (!attr->expected_attach_type)
+++			attr->expected_attach_type =
+++				BPF_CGROUP_INET_SOCK_CREATE;
+++		break;
+++	}
+++}
+++
+++static int
+++bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
+++				enum bpf_attach_type expected_attach_type)
+++{
+++	switch (prog_type) {
+++	case BPF_PROG_TYPE_CGROUP_SOCK:
+++		switch (expected_attach_type) {
+++		case BPF_CGROUP_INET_SOCK_CREATE:
+++		case BPF_CGROUP_INET4_POST_BIND:
+++		case BPF_CGROUP_INET6_POST_BIND:
+++			return 0;
+++		default:
+++			return -EINVAL;
+++		}
+++	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+++		switch (expected_attach_type) {
+++		case BPF_CGROUP_INET4_BIND:
+++		case BPF_CGROUP_INET6_BIND:
+++		case BPF_CGROUP_INET4_CONNECT:
+++		case BPF_CGROUP_INET6_CONNECT:
+++		case BPF_CGROUP_UDP4_SENDMSG:
+++		case BPF_CGROUP_UDP6_SENDMSG:
+++		case BPF_CGROUP_UDP4_RECVMSG:
+++		case BPF_CGROUP_UDP6_RECVMSG:
+++			return 0;
+++		default:
+++			return -EINVAL;
+++		}
+++	case BPF_PROG_TYPE_CGROUP_SKB:
+++		switch (expected_attach_type) {
+++		case BPF_CGROUP_INET_INGRESS:
+++		case BPF_CGROUP_INET_EGRESS:
+++			return 0;
+++		default:
+++			return -EINVAL;
+++		}
+++	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+++		switch (expected_attach_type) {
+++		case BPF_CGROUP_SETSOCKOPT:
+++		case BPF_CGROUP_GETSOCKOPT:
+++			return 0;
+++		default:
+++			return -EINVAL;
+++		}
+++	default:
+++		return 0;
+++	}
+++}
++ 
++ /* last field in 'union bpf_attr' used by this command */
++-#define	BPF_PROG_LOAD_LAST_FIELD kern_version
+++#define	BPF_PROG_LOAD_LAST_FIELD line_info_cnt
++ 
++-static int bpf_prog_load(union bpf_attr *attr)
+++static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
++ {
++ 	enum bpf_prog_type type = attr->prog_type;
++ 	struct bpf_prog *prog;
++@@ -629,8 +1568,19 @@ static int bpf_prog_load(union bpf_attr
++ 	if (CHECK_ATTR(BPF_PROG_LOAD))
++ 		return -EINVAL;
++ 
+++	if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
+++				 BPF_F_ANY_ALIGNMENT |
+++				 BPF_F_TEST_STATE_FREQ |
+++				 BPF_F_TEST_RND_HI32))
+++		return -EINVAL;
+++
+++	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
+++	    (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
+++	    !capable(CAP_SYS_ADMIN))
+++		return -EPERM;
+++
++ 	/* copy eBPF program license from user space */
++-	if (strncpy_from_user(license, u64_to_ptr(attr->license),
+++	if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
++ 			      sizeof(license) - 1) < 0)
++ 		return -EFAULT;
++ 	license[sizeof(license) - 1] = 0;
++@@ -638,30 +1588,36 @@ static int bpf_prog_load(union bpf_attr
++ 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
++ 	is_gpl = license_is_gpl_compatible(license);
++ 
++-	if (attr->insn_cnt >= BPF_MAXINSNS)
++-		return -EINVAL;
+++	if (attr->insn_cnt == 0 ||
+++	    attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
+++		return -E2BIG;
+++	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
+++	    type != BPF_PROG_TYPE_CGROUP_SKB &&
+++	    !capable(CAP_SYS_ADMIN))
+++		return -EPERM;
++ 
++-	if (type == BPF_PROG_TYPE_KPROBE &&
++-	    attr->kern_version != LINUX_VERSION_CODE)
+++	bpf_prog_load_fixup_attach_type(attr);
+++	if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
++ 		return -EINVAL;
++ 
++-	if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
++-		return -EPERM;
++-
++ 	/* plain bpf_prog allocation */
++ 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
++ 	if (!prog)
++ 		return -ENOMEM;
++ 
+++	prog->expected_attach_type = attr->expected_attach_type;
+++
+++	prog->aux->offload_requested = !!attr->prog_ifindex;
+++
++ 	err = bpf_prog_charge_memlock(prog);
++ 	if (err)
++-		goto free_prog_nouncharge;
+++		goto free_prog_sec;
++ 
++ 	prog->len = attr->insn_cnt;
++ 
++ 	err = -EFAULT;
++-	if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
++-			   prog->len * sizeof(struct bpf_insn)) != 0)
+++	if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
+++			   bpf_prog_insn_size(prog)) != 0)
++ 		goto free_prog;
++ 
++ 	prog->orig_prog = NULL;
++@@ -675,91 +1631,720 @@ static int bpf_prog_load(union bpf_attr
++ 	if (err < 0)
++ 		goto free_prog;
++ 
+++	prog->aux->load_time = ktime_get_boot_ns();
+++	err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name);
+++	if (err)
+++		goto free_prog;
+++
++ 	/* run eBPF verifier */
++-	err = bpf_check(&prog, attr);
+++	err = bpf_check(&prog, attr, uattr);
++ 	if (err < 0)
++ 		goto free_used_maps;
++ 
++-	/* fixup BPF_CALL->imm field */
++-	fixup_bpf_calls(prog);
++-
++-	/* eBPF program is ready to be JITed */
++-	err = bpf_prog_select_runtime(prog);
+++	prog = bpf_prog_select_runtime(prog, &err);
++ 	if (err < 0)
++ 		goto free_used_maps;
++ 
++-	err = bpf_prog_new_fd(prog);
++-	if (err < 0)
++-		/* failed to allocate fd */
+++	err = bpf_prog_alloc_id(prog);
+++	if (err)
++ 		goto free_used_maps;
++ 
+++	/* Upon success of bpf_prog_alloc_id(), the BPF prog is
+++	 * effectively publicly exposed. However, retrieving via
+++	 * bpf_prog_get_fd_by_id() will take another reference,
+++	 * therefore it cannot be gone underneath us.
+++	 *
+++	 * Only for the time /after/ successful bpf_prog_new_fd()
+++	 * and before returning to userspace, we might just hold
+++	 * one reference and any parallel close on that fd could
+++	 * rip everything out. Hence, below notifications must
+++	 * happen before bpf_prog_new_fd().
+++	 *
+++	 * Also, any failure handling from this point onwards must
+++	 * be using bpf_prog_put() given the program is exposed.
+++	 */
+++	bpf_prog_kallsyms_add(prog);
+++
+++	err = bpf_prog_new_fd(prog);
+++	if (err < 0)
+++		bpf_prog_put(prog);
++ 	return err;
++ 
++ free_used_maps:
++-	free_used_maps(prog->aux);
+++	/* In case we have subprogs, we need to wait for a grace
+++	 * period before we can tear down JIT memory since symbols
+++	 * are already exposed under kallsyms.
+++	 */
+++	__bpf_prog_put_noref(prog, prog->aux->func_cnt);
+++	return err;
++ free_prog:
++ 	bpf_prog_uncharge_memlock(prog);
++-free_prog_nouncharge:
+++free_prog_sec:
++ 	bpf_prog_free(prog);
++ 	return err;
++ }
++ 
++-#define BPF_OBJ_LAST_FIELD bpf_fd
+++#define BPF_OBJ_LAST_FIELD file_flags
++ 
++ static int bpf_obj_pin(const union bpf_attr *attr)
++ {
++-	if (CHECK_ATTR(BPF_OBJ))
+++	if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
++ 		return -EINVAL;
++ 
++-	return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
+++	return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
++ }
++ 
++ static int bpf_obj_get(const union bpf_attr *attr)
++ {
++-	if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
+++	if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
+++	    attr->file_flags & ~BPF_OBJ_FLAG_MASK)
++ 		return -EINVAL;
++ 
++-	return bpf_obj_get_user(u64_to_ptr(attr->pathname));
+++	return bpf_obj_get_user(u64_to_user_ptr(attr->pathname),
+++				attr->file_flags);
++ }
++ 
++-SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
+++
+++#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
+++
+++#define BPF_F_ATTACH_MASK \
+++	(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
+++
+++
+++#define BPF_PROG_DETACH_LAST_FIELD attach_type
+++
+++
+++#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
+++
+++
+++#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
+++
+++static int bpf_prog_test_run(const union bpf_attr *attr,
+++			     union bpf_attr __user *uattr)
++ {
++-	union bpf_attr attr = {};
++-	int err;
+++	struct bpf_prog *prog;
+++	int ret = -ENOTSUPP;
++ 
++-	if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
+++	if (!capable(CAP_SYS_ADMIN))
++ 		return -EPERM;
+++	if (CHECK_ATTR(BPF_PROG_TEST_RUN))
+++		return -EINVAL;
+++
+++	if ((attr->test.ctx_size_in && !attr->test.ctx_in) ||
+++	    (!attr->test.ctx_size_in && attr->test.ctx_in))
+++		return -EINVAL;
+++
+++	if ((attr->test.ctx_size_out && !attr->test.ctx_out) ||
+++	    (!attr->test.ctx_size_out && attr->test.ctx_out))
+++		return -EINVAL;
+++
+++	prog = bpf_prog_get(attr->test.prog_fd);
+++	if (IS_ERR(prog))
+++		return PTR_ERR(prog);
+++
+++	if (prog->aux->ops->test_run)
+++		ret = prog->aux->ops->test_run(prog, attr, uattr);
+++
+++	bpf_prog_put(prog);
+++	return ret;
+++}
+++
+++#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
+++
+++static int bpf_obj_get_next_id(const union bpf_attr *attr,
+++			       union bpf_attr __user *uattr,
+++			       struct idr *idr,
+++			       spinlock_t *lock)
+++{
+++	u32 next_id = attr->start_id;
+++	int err = 0;
+++
+++	if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
+++		return -EINVAL;
+++
+++	if (!capable(CAP_SYS_ADMIN))
+++		return -EPERM;
+++
+++	next_id++;
+++	spin_lock_bh(lock);
+++	if (!idr_get_next(idr, &next_id))
+++		err = -ENOENT;
+++	spin_unlock_bh(lock);
+++
+++	if (!err)
+++		err = put_user(next_id, &uattr->next_id);
+++
+++	return err;
+++}
+++
+++#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
+++
+++static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
+++{
+++	struct bpf_prog *prog;
+++	u32 id = attr->prog_id;
+++	int fd;
+++
+++	if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
+++		return -EINVAL;
+++
+++	if (!capable(CAP_SYS_ADMIN))
+++		return -EPERM;
+++
+++	spin_lock_bh(&prog_idr_lock);
+++	prog = idr_find(&prog_idr, id);
+++	if (prog)
+++		prog = bpf_prog_inc_not_zero(prog);
+++	else
+++		prog = ERR_PTR(-ENOENT);
+++	spin_unlock_bh(&prog_idr_lock);
+++
+++	if (IS_ERR(prog))
+++		return PTR_ERR(prog);
+++
+++	fd = bpf_prog_new_fd(prog);
+++	if (fd < 0)
+++		bpf_prog_put(prog);
+++
+++	return fd;
+++}
+++
+++#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
+++
+++static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
+++{
+++	struct bpf_map *map;
+++	u32 id = attr->map_id;
+++	int f_flags;
+++	int fd;
+++
+++	if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) ||
+++	    attr->open_flags & ~BPF_OBJ_FLAG_MASK)
+++		return -EINVAL;
+++
+++	if (!capable(CAP_SYS_ADMIN))
+++		return -EPERM;
+++
+++	f_flags = bpf_get_file_flag(attr->open_flags);
+++	if (f_flags < 0)
+++		return f_flags;
+++
+++	spin_lock_bh(&map_idr_lock);
+++	map = idr_find(&map_idr, id);
+++	if (map)
+++		map = __bpf_map_inc_not_zero(map, true);
+++	else
+++		map = ERR_PTR(-ENOENT);
+++	spin_unlock_bh(&map_idr_lock);
+++
+++	if (IS_ERR(map))
+++		return PTR_ERR(map);
++ 
++-	if (!access_ok(VERIFY_READ, uattr, 1))
+++	fd = bpf_map_new_fd(map, f_flags);
+++	if (fd < 0)
+++		bpf_map_put_with_uref(map);
+++
+++	return fd;
+++}
+++
+++static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
+++					      unsigned long addr, u32 *off,
+++					      u32 *type)
+++{
+++	const struct bpf_map *map;
+++	int i;
+++
+++	for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
+++		map = prog->aux->used_maps[i];
+++		if (map == (void *)addr) {
+++			*type = BPF_PSEUDO_MAP_FD;
+++			return map;
+++		}
+++		if (!map->ops->map_direct_value_meta)
+++			continue;
+++		if (!map->ops->map_direct_value_meta(map, addr, off)) {
+++			*type = BPF_PSEUDO_MAP_VALUE;
+++			return map;
+++		}
+++	}
+++
+++	return NULL;
+++}
+++
+++static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
+++					      const struct cred *f_cred)
+++{
+++	const struct bpf_map *map;
+++	struct bpf_insn *insns;
+++	u32 off, type;
+++	u64 imm;
+++	int i;
+++
+++	insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
+++			GFP_USER);
+++	if (!insns)
+++		return insns;
+++
+++	for (i = 0; i < prog->len; i++) {
+++		if (insns[i].code == (BPF_JMP | BPF_TAIL_CALL)) {
+++			insns[i].code = BPF_JMP | BPF_CALL;
+++			insns[i].imm = BPF_FUNC_tail_call;
+++			/* fall-through */
+++		}
+++		if (insns[i].code == (BPF_JMP | BPF_CALL) ||
+++		    insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) {
+++			if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS))
+++				insns[i].code = BPF_JMP | BPF_CALL;
+++			if (!bpf_dump_raw_ok(f_cred))
+++				insns[i].imm = 0;
+++			continue;
+++		}
+++
+++		if (insns[i].code != (BPF_LD | BPF_IMM | BPF_DW))
+++			continue;
+++
+++		imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
+++		map = bpf_map_from_imm(prog, imm, &off, &type);
+++		if (map) {
+++			insns[i].src_reg = type;
+++			insns[i].imm = map->id;
+++			insns[i + 1].imm = off;
+++			continue;
+++		}
+++	}
+++
+++	return insns;
+++}
+++
+++static int set_info_rec_size(struct bpf_prog_info *info)
+++{
+++	/*
+++	 * Ensure info.*_rec_size is the same as kernel expected size
+++	 *
+++	 * or
+++	 *
+++	 * Only allow zero *_rec_size if both _rec_size and _cnt are
+++	 * zero.  In this case, the kernel will set the expected
+++	 * _rec_size back to the info.
+++	 */
+++
+++	if ((info->nr_func_info || info->func_info_rec_size) &&
+++	    info->func_info_rec_size != sizeof(struct bpf_func_info))
+++		return -EINVAL;
+++
+++	if ((info->nr_line_info || info->line_info_rec_size) &&
+++	    info->line_info_rec_size != sizeof(struct bpf_line_info))
+++		return -EINVAL;
+++
+++	if ((info->nr_jited_line_info || info->jited_line_info_rec_size) &&
+++	    info->jited_line_info_rec_size != sizeof(__u64))
+++		return -EINVAL;
+++
+++	info->func_info_rec_size = sizeof(struct bpf_func_info);
+++	info->line_info_rec_size = sizeof(struct bpf_line_info);
+++	info->jited_line_info_rec_size = sizeof(__u64);
+++
+++	return 0;
+++}
+++
+++static int bpf_prog_get_info_by_fd(struct file *file,
+++				   struct bpf_prog *prog,
+++				   const union bpf_attr *attr,
+++				   union bpf_attr __user *uattr)
+++{
+++	struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
+++	struct bpf_prog_info info;
+++	u32 info_len = attr->info.info_len;
+++	struct bpf_prog_stats stats;
+++	char __user *uinsns;
+++	u32 ulen;
+++	int err;
+++
+++	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+++	if (err)
+++		return err;
+++	info_len = min_t(u32, sizeof(info), info_len);
+++
+++	memset(&info, 0, sizeof(info));
+++	if (copy_from_user(&info, uinfo, info_len))
++ 		return -EFAULT;
++ 
++-	if (size > PAGE_SIZE)	/* silly large */
++-		return -E2BIG;
+++	info.type = prog->type;
+++	info.id = prog->aux->id;
+++	info.load_time = prog->aux->load_time;
+++	info.created_by_uid = from_kuid_munged(current_user_ns(),
+++					       prog->aux->user->uid);
+++	info.gpl_compatible = prog->gpl_compatible;
+++
+++	memcpy(info.tag, prog->tag, sizeof(prog->tag));
+++	memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
+++
+++	ulen = info.nr_map_ids;
+++	info.nr_map_ids = prog->aux->used_map_cnt;
+++	ulen = min_t(u32, info.nr_map_ids, ulen);
+++	if (ulen) {
+++		u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
+++		u32 i;
+++
+++		for (i = 0; i < ulen; i++)
+++			if (put_user(prog->aux->used_maps[i]->id,
+++				     &user_map_ids[i]))
+++				return -EFAULT;
+++	}
+++
+++	err = set_info_rec_size(&info);
+++	if (err)
+++		return err;
+++
+++	bpf_prog_get_stats(prog, &stats);
+++	info.run_time_ns = stats.nsecs;
+++	info.run_cnt = stats.cnt;
+++
+++	if (!capable(CAP_SYS_ADMIN)) {
+++		info.jited_prog_len = 0;
+++		info.xlated_prog_len = 0;
+++		info.nr_jited_ksyms = 0;
+++		info.nr_jited_func_lens = 0;
+++		info.nr_func_info = 0;
+++		info.nr_line_info = 0;
+++		info.nr_jited_line_info = 0;
+++		goto done;
+++	}
+++
+++	ulen = info.xlated_prog_len;
+++	info.xlated_prog_len = bpf_prog_insn_size(prog);
+++	if (info.xlated_prog_len && ulen) {
+++		struct bpf_insn *insns_sanitized;
+++		bool fault;
+++
+++		if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) {
+++			info.xlated_prog_insns = 0;
+++			goto done;
+++		}
+++		insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
+++		if (!insns_sanitized)
+++			return -ENOMEM;
+++		uinsns = u64_to_user_ptr(info.xlated_prog_insns);
+++		ulen = min_t(u32, info.xlated_prog_len, ulen);
+++		fault = copy_to_user(uinsns, insns_sanitized, ulen);
+++		kfree(insns_sanitized);
+++		if (fault)
+++			return -EFAULT;
+++	}
+++
+++	/* NOTE: the following code is supposed to be skipped for offload.
+++	 * bpf_prog_offload_info_fill() is the place to fill similar fields
+++	 * for offload.
+++	 */
+++	ulen = info.jited_prog_len;
+++	if (prog->aux->func_cnt) {
+++		u32 i;
+++
+++		info.jited_prog_len = 0;
+++		for (i = 0; i < prog->aux->func_cnt; i++)
+++			info.jited_prog_len += prog->aux->func[i]->jited_len;
+++	} else {
+++		info.jited_prog_len = prog->jited_len;
+++	}
+++
+++	if (info.jited_prog_len && ulen) {
+++		if (bpf_dump_raw_ok(file->f_cred)) {
+++			uinsns = u64_to_user_ptr(info.jited_prog_insns);
+++			ulen = min_t(u32, info.jited_prog_len, ulen);
+++
+++			/* for multi-function programs, copy the JITed
+++			 * instructions for all the functions
+++			 */
+++			if (prog->aux->func_cnt) {
+++				u32 len, free, i;
+++				u8 *img;
+++
+++				free = ulen;
+++				for (i = 0; i < prog->aux->func_cnt; i++) {
+++					len = prog->aux->func[i]->jited_len;
+++					len = min_t(u32, len, free);
+++					img = (u8 *) prog->aux->func[i]->bpf_func;
+++					if (copy_to_user(uinsns, img, len))
+++						return -EFAULT;
+++					uinsns += len;
+++					free -= len;
+++					if (!free)
+++						break;
+++				}
+++			} else {
+++				if (copy_to_user(uinsns, prog->bpf_func, ulen))
+++					return -EFAULT;
+++			}
+++		} else {
+++			info.jited_prog_insns = 0;
+++		}
+++	}
+++
+++	ulen = info.nr_jited_ksyms;
+++	info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
+++	if (ulen) {
+++		if (bpf_dump_raw_ok(file->f_cred)) {
+++			unsigned long ksym_addr;
+++			u64 __user *user_ksyms;
+++			u32 i;
+++
+++			/* copy the address of the kernel symbol
+++			 * corresponding to each function
+++			 */
+++			ulen = min_t(u32, info.nr_jited_ksyms, ulen);
+++			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
+++			if (prog->aux->func_cnt) {
+++				for (i = 0; i < ulen; i++) {
+++					ksym_addr = (unsigned long)
+++						prog->aux->func[i]->bpf_func;
+++					if (put_user((u64) ksym_addr,
+++						     &user_ksyms[i]))
+++						return -EFAULT;
+++				}
+++			} else {
+++				ksym_addr = (unsigned long) prog->bpf_func;
+++				if (put_user((u64) ksym_addr, &user_ksyms[0]))
+++					return -EFAULT;
+++			}
+++		} else {
+++			info.jited_ksyms = 0;
+++		}
+++	}
+++
+++	ulen = info.nr_jited_func_lens;
+++	info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
+++	if (ulen) {
+++		if (bpf_dump_raw_ok(file->f_cred)) {
+++			u32 __user *user_lens;
+++			u32 func_len, i;
+++
+++			/* copy the JITed image lengths for each function */
+++			ulen = min_t(u32, info.nr_jited_func_lens, ulen);
+++			user_lens = u64_to_user_ptr(info.jited_func_lens);
+++			if (prog->aux->func_cnt) {
+++				for (i = 0; i < ulen; i++) {
+++					func_len =
+++						prog->aux->func[i]->jited_len;
+++					if (put_user(func_len, &user_lens[i]))
+++						return -EFAULT;
+++				}
+++			} else {
+++				func_len = prog->jited_len;
+++				if (put_user(func_len, &user_lens[0]))
+++					return -EFAULT;
+++			}
+++		} else {
+++			info.jited_func_lens = 0;
+++		}
+++	}
+++
+++	if (prog->aux->btf)
+++		info.btf_id = btf_id(prog->aux->btf);
+++
+++	ulen = info.nr_func_info;
+++	info.nr_func_info = prog->aux->func_info_cnt;
+++	if (info.nr_func_info && ulen) {
+++		char __user *user_finfo;
+++
+++		user_finfo = u64_to_user_ptr(info.func_info);
+++		ulen = min_t(u32, info.nr_func_info, ulen);
+++		if (copy_to_user(user_finfo, prog->aux->func_info,
+++				 info.func_info_rec_size * ulen))
+++			return -EFAULT;
+++	}
+++
+++	ulen = info.nr_line_info;
+++	info.nr_line_info = prog->aux->nr_linfo;
+++	if (info.nr_line_info && ulen) {
+++		__u8 __user *user_linfo;
+++
+++		user_linfo = u64_to_user_ptr(info.line_info);
+++		ulen = min_t(u32, info.nr_line_info, ulen);
+++		if (copy_to_user(user_linfo, prog->aux->linfo,
+++				 info.line_info_rec_size * ulen))
+++			return -EFAULT;
+++	}
++ 
++-	/* If we're handed a bigger struct than we know of,
++-	 * ensure all the unknown bits are 0 - i.e. new
++-	 * user-space does not rely on any kernel feature
++-	 * extensions we dont know about yet.
++-	 */
++-	if (size > sizeof(attr)) {
++-		unsigned char __user *addr;
++-		unsigned char __user *end;
++-		unsigned char val;
++-
++-		addr = (void __user *)uattr + sizeof(attr);
++-		end  = (void __user *)uattr + size;
++-
++-		for (; addr < end; addr++) {
++-			err = get_user(val, addr);
++-			if (err)
++-				return err;
++-			if (val)
++-				return -E2BIG;
+++	ulen = info.nr_jited_line_info;
+++	if (prog->aux->jited_linfo)
+++		info.nr_jited_line_info = prog->aux->nr_linfo;
+++	else
+++		info.nr_jited_line_info = 0;
+++	if (info.nr_jited_line_info && ulen) {
+++		if (bpf_dump_raw_ok(file->f_cred)) {
+++			__u64 __user *user_linfo;
+++			u32 i;
+++
+++			user_linfo = u64_to_user_ptr(info.jited_line_info);
+++			ulen = min_t(u32, info.nr_jited_line_info, ulen);
+++			for (i = 0; i < ulen; i++) {
+++				if (put_user((__u64)(long)prog->aux->jited_linfo[i],
+++					     &user_linfo[i]))
+++					return -EFAULT;
+++			}
+++		} else {
+++			info.jited_line_info = 0;
++ 		}
++-		size = sizeof(attr);
++ 	}
++ 
+++	ulen = info.nr_prog_tags;
+++	info.nr_prog_tags = prog->aux->func_cnt ? : 1;
+++	if (ulen) {
+++		__u8 __user (*user_prog_tags)[BPF_TAG_SIZE];
+++		u32 i;
+++
+++		user_prog_tags = u64_to_user_ptr(info.prog_tags);
+++		ulen = min_t(u32, info.nr_prog_tags, ulen);
+++		if (prog->aux->func_cnt) {
+++			for (i = 0; i < ulen; i++) {
+++				if (copy_to_user(user_prog_tags[i],
+++						 prog->aux->func[i]->tag,
+++						 BPF_TAG_SIZE))
+++					return -EFAULT;
+++			}
+++		} else {
+++			if (copy_to_user(user_prog_tags[0],
+++					 prog->tag, BPF_TAG_SIZE))
+++				return -EFAULT;
+++		}
+++	}
+++
+++done:
+++	if (copy_to_user(uinfo, &info, info_len) ||
+++	    put_user(info_len, &uattr->info.info_len))
+++		return -EFAULT;
+++
+++	return 0;
+++}
+++
+++static int bpf_map_get_info_by_fd(struct file *file,
+++				  struct bpf_map *map,
+++				  const union bpf_attr *attr,
+++				  union bpf_attr __user *uattr)
+++{
+++	struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
+++	struct bpf_map_info info;
+++	u32 info_len = attr->info.info_len;
+++	int err;
+++
+++	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+++	if (err)
+++		return err;
+++	info_len = min_t(u32, sizeof(info), info_len);
+++
+++	memset(&info, 0, sizeof(info));
+++	info.type = map->map_type;
+++	info.id = map->id;
+++	info.key_size = map->key_size;
+++	info.value_size = map->value_size;
+++	info.max_entries = map->max_entries;
+++	info.map_flags = map->map_flags;
+++	memcpy(info.name, map->name, sizeof(map->name));
+++
+++	if (map->btf) {
+++		info.btf_id = btf_id(map->btf);
+++		info.btf_key_type_id = map->btf_key_type_id;
+++		info.btf_value_type_id = map->btf_value_type_id;
+++	}
+++
+++	if (copy_to_user(uinfo, &info, info_len) ||
+++	    put_user(info_len, &uattr->info.info_len))
+++		return -EFAULT;
+++
+++	return 0;
+++}
+++
+++static int bpf_btf_get_info_by_fd(struct file *file,
+++				  struct btf *btf,
+++				  const union bpf_attr *attr,
+++				  union bpf_attr __user *uattr)
+++{
+++	struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
+++	u32 info_len = attr->info.info_len;
+++	int err;
+++
+++	err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
+++	if (err)
+++		return err;
+++
+++	return btf_get_info_by_fd(btf, attr, uattr);
+++}
+++
+++#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
+++
+++static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
+++				  union bpf_attr __user *uattr)
+++{
+++	int ufd = attr->info.bpf_fd;
+++	struct fd f;
+++	int err;
+++
+++	if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
+++		return -EINVAL;
+++
+++	f = fdget(ufd);
+++	if (!f.file)
+++		return -EBADFD;
+++
+++	if (f.file->f_op == &bpf_prog_fops)
+++		err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr,
+++					      uattr);
+++	else if (f.file->f_op == &bpf_map_fops)
+++		err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr,
+++					     uattr);
+++	else if (f.file->f_op == &btf_fops)
+++		err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
+++	else
+++		err = -EINVAL;
+++
+++	fdput(f);
+++	return err;
+++}
+++
+++#define BPF_BTF_LOAD_LAST_FIELD btf_log_level
+++
+++static int bpf_btf_load(const union bpf_attr *attr)
+++{
+++	if (CHECK_ATTR(BPF_BTF_LOAD))
+++		return -EINVAL;
+++
+++	if (!capable(CAP_SYS_ADMIN))
+++		return -EPERM;
+++
+++	return btf_new_fd(attr);
+++}
+++
+++#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
+++
+++static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
+++{
+++	if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
+++		return -EINVAL;
+++
+++	if (!capable(CAP_SYS_ADMIN))
+++		return -EPERM;
+++
+++	return btf_get_fd_by_id(attr->btf_id);
+++}
+++
+++
+++#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
+++
+++SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
+++{
+++	union bpf_attr attr;
+++	int err;
+++
+++	if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
+++		return -EPERM;
+++
+++	err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
+++	if (err)
+++		return err;
+++	size = min_t(u32, size, sizeof(attr));
+++
++ 	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
+++	memset(&attr, 0, sizeof(attr));
++ 	if (copy_from_user(&attr, uattr, size) != 0)
++ 		return -EFAULT;
++ 
++@@ -779,8 +2364,11 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf
++ 	case BPF_MAP_GET_NEXT_KEY:
++ 		err = map_get_next_key(&attr);
++ 		break;
+++	case BPF_MAP_FREEZE:
+++		err = map_freeze(&attr);
+++		break;
++ 	case BPF_PROG_LOAD:
++-		err = bpf_prog_load(&attr);
+++		err = bpf_prog_load(&attr, uattr);
++ 		break;
++ 	case BPF_OBJ_PIN:
++ 		err = bpf_obj_pin(&attr);
++@@ -788,6 +2376,39 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf
++ 	case BPF_OBJ_GET:
++ 		err = bpf_obj_get(&attr);
++ 		break;
+++	case BPF_PROG_TEST_RUN:
+++		err = bpf_prog_test_run(&attr, uattr);
+++		break;
+++	case BPF_PROG_GET_NEXT_ID:
+++		err = bpf_obj_get_next_id(&attr, uattr,
+++					  &prog_idr, &prog_idr_lock);
+++		break;
+++	case BPF_MAP_GET_NEXT_ID:
+++		err = bpf_obj_get_next_id(&attr, uattr,
+++					  &map_idr, &map_idr_lock);
+++		break;
+++	case BPF_BTF_GET_NEXT_ID:
+++		err = bpf_obj_get_next_id(&attr, uattr,
+++					  &btf_idr, &btf_idr_lock);
+++		break;
+++	case BPF_PROG_GET_FD_BY_ID:
+++		err = bpf_prog_get_fd_by_id(&attr);
+++		break;
+++	case BPF_MAP_GET_FD_BY_ID:
+++		err = bpf_map_get_fd_by_id(&attr);
+++		break;
+++	case BPF_OBJ_GET_INFO_BY_FD:
+++		err = bpf_obj_get_info_by_fd(&attr, uattr);
+++		break;
+++	case BPF_BTF_LOAD:
+++		err = bpf_btf_load(&attr);
+++		break;
+++	case BPF_BTF_GET_FD_BY_ID:
+++		err = bpf_btf_get_fd_by_id(&attr);
+++		break;
+++	case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
+++		err = map_lookup_and_delete_elem(&attr);
+++		break;
++ 	default:
++ 		err = -EINVAL;
++ 		break;
++--- /dev/null
+++++ b/kernel/bpf/sysfs_btf.c
++@@ -0,0 +1,45 @@
+++// SPDX-License-Identifier: GPL-2.0
+++/*
+++ * Provide kernel BTF information for introspection and use by eBPF tools.
+++ */
+++#include <linux/kernel.h>
+++#include <linux/module.h>
+++#include <linux/kobject.h>
+++#include <linux/init.h>
+++#include <linux/sysfs.h>
+++
+++/* See scripts/link-vmlinux.sh, gen_btf() func for details */
+++extern char __weak __start_BTF[];
+++extern char __weak __stop_BTF[];
+++
+++static ssize_t
+++btf_vmlinux_read(struct file *file, struct kobject *kobj,
+++		 struct bin_attribute *bin_attr,
+++		 char *buf, loff_t off, size_t len)
+++{
+++	memcpy(buf, __start_BTF + off, len);
+++	return len;
+++}
+++
+++static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = {
+++	.attr = { .name = "vmlinux", .mode = 0444, },
+++	.read = btf_vmlinux_read,
+++};
+++
+++static struct kobject *btf_kobj;
+++
+++static int __init btf_vmlinux_init(void)
+++{
+++	bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF;
+++
+++	if (!__start_BTF || bin_attr_btf_vmlinux.size == 0)
+++		return 0;
+++
+++	btf_kobj = kobject_create_and_add("btf", kernel_kobj);
+++	if (!btf_kobj)
+++		return -ENOMEM;
+++
+++	return sysfs_create_bin_file(btf_kobj, &bin_attr_btf_vmlinux);
+++}
+++
+++subsys_initcall(btf_vmlinux_init);
++--- /dev/null
+++++ b/kernel/bpf/tnum.c
++@@ -0,0 +1,196 @@
+++// SPDX-License-Identifier: GPL-2.0-only
+++/* tnum: tracked (or tristate) numbers
+++ *
+++ * A tnum tracks knowledge about the bits of a value.  Each bit can be either
+++ * known (0 or 1), or unknown (x).  Arithmetic operations on tnums will
+++ * propagate the unknown bits such that the tnum result represents all the
+++ * possible results for possible values of the operands.
+++ */
+++#include <linux/kernel.h>
+++#include <linux/tnum.h>
+++
+++#define TNUM(_v, _m)	(struct tnum){.value = _v, .mask = _m}
+++/* A completely unknown value */
+++const struct tnum tnum_unknown = { .value = 0, .mask = -1 };
+++
+++struct tnum tnum_const(u64 value)
+++{
+++	return TNUM(value, 0);
+++}
+++
+++struct tnum tnum_range(u64 min, u64 max)
+++{
+++	u64 chi = min ^ max, delta;
+++	u8 bits = fls64(chi);
+++
+++	/* special case, needed because 1ULL << 64 is undefined */
+++	if (bits > 63)
+++		return tnum_unknown;
+++	/* e.g. if chi = 4, bits = 3, delta = (1<<3) - 1 = 7.
+++	 * if chi = 0, bits = 0, delta = (1<<0) - 1 = 0, so we return
+++	 *  constant min (since min == max).
+++	 */
+++	delta = (1ULL << bits) - 1;
+++	return TNUM(min & ~delta, delta);
+++}
+++
+++struct tnum tnum_lshift(struct tnum a, u8 shift)
+++{
+++	return TNUM(a.value << shift, a.mask << shift);
+++}
+++
+++struct tnum tnum_rshift(struct tnum a, u8 shift)
+++{
+++	return TNUM(a.value >> shift, a.mask >> shift);
+++}
+++
+++struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness)
+++{
+++	/* if a.value is negative, arithmetic shifting by minimum shift
+++	 * will have larger negative offset compared to more shifting.
+++	 * If a.value is nonnegative, arithmetic shifting by minimum shift
+++	 * will have larger positive offset compare to more shifting.
+++	 */
+++	if (insn_bitness == 32)
+++		return TNUM((u32)(((s32)a.value) >> min_shift),
+++			    (u32)(((s32)a.mask)  >> min_shift));
+++	else
+++		return TNUM((s64)a.value >> min_shift,
+++			    (s64)a.mask  >> min_shift);
+++}
+++
+++struct tnum tnum_add(struct tnum a, struct tnum b)
+++{
+++	u64 sm, sv, sigma, chi, mu;
+++
+++	sm = a.mask + b.mask;
+++	sv = a.value + b.value;
+++	sigma = sm + sv;
+++	chi = sigma ^ sv;
+++	mu = chi | a.mask | b.mask;
+++	return TNUM(sv & ~mu, mu);
+++}
+++
+++struct tnum tnum_sub(struct tnum a, struct tnum b)
+++{
+++	u64 dv, alpha, beta, chi, mu;
+++
+++	dv = a.value - b.value;
+++	alpha = dv + a.mask;
+++	beta = dv - b.mask;
+++	chi = alpha ^ beta;
+++	mu = chi | a.mask | b.mask;
+++	return TNUM(dv & ~mu, mu);
+++}
+++
+++struct tnum tnum_and(struct tnum a, struct tnum b)
+++{
+++	u64 alpha, beta, v;
+++
+++	alpha = a.value | a.mask;
+++	beta = b.value | b.mask;
+++	v = a.value & b.value;
+++	return TNUM(v, alpha & beta & ~v);
+++}
+++
+++struct tnum tnum_or(struct tnum a, struct tnum b)
+++{
+++	u64 v, mu;
+++
+++	v = a.value | b.value;
+++	mu = a.mask | b.mask;
+++	return TNUM(v, mu & ~v);
+++}
+++
+++struct tnum tnum_xor(struct tnum a, struct tnum b)
+++{
+++	u64 v, mu;
+++
+++	v = a.value ^ b.value;
+++	mu = a.mask | b.mask;
+++	return TNUM(v & ~mu, mu);
+++}
+++
+++/* half-multiply add: acc += (unknown * mask * value).
+++ * An intermediate step in the multiply algorithm.
+++ */
+++static struct tnum hma(struct tnum acc, u64 value, u64 mask)
+++{
+++	while (mask) {
+++		if (mask & 1)
+++			acc = tnum_add(acc, TNUM(0, value));
+++		mask >>= 1;
+++		value <<= 1;
+++	}
+++	return acc;
+++}
+++
+++struct tnum tnum_mul(struct tnum a, struct tnum b)
+++{
+++	struct tnum acc;
+++	u64 pi;
+++
+++	pi = a.value * b.value;
+++	acc = hma(TNUM(pi, 0), a.mask, b.mask | b.value);
+++	return hma(acc, b.mask, a.value);
+++}
+++
+++/* Note that if a and b disagree - i.e. one has a 'known 1' where the other has
+++ * a 'known 0' - this will return a 'known 1' for that bit.
+++ */
+++struct tnum tnum_intersect(struct tnum a, struct tnum b)
+++{
+++	u64 v, mu;
+++
+++	v = a.value | b.value;
+++	mu = a.mask & b.mask;
+++	return TNUM(v & ~mu, mu);
+++}
+++
+++struct tnum tnum_cast(struct tnum a, u8 size)
+++{
+++	a.value &= (1ULL << (size * 8)) - 1;
+++	a.mask &= (1ULL << (size * 8)) - 1;
+++	return a;
+++}
+++
+++bool tnum_is_aligned(struct tnum a, u64 size)
+++{
+++	if (!size)
+++		return true;
+++	return !((a.value | a.mask) & (size - 1));
+++}
+++
+++bool tnum_in(struct tnum a, struct tnum b)
+++{
+++	if (b.mask & ~a.mask)
+++		return false;
+++	b.value &= ~a.mask;
+++	return a.value == b.value;
+++}
+++
+++int tnum_strn(char *str, size_t size, struct tnum a)
+++{
+++	return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask);
+++}
+++EXPORT_SYMBOL_GPL(tnum_strn);
+++
+++int tnum_sbin(char *str, size_t size, struct tnum a)
+++{
+++	size_t n;
+++
+++	for (n = 64; n; n--) {
+++		if (n < size) {
+++			if (a.mask & 1)
+++				str[n - 1] = 'x';
+++			else if (a.value & 1)
+++				str[n - 1] = '1';
+++			else
+++				str[n - 1] = '0';
+++		}
+++		a.mask >>= 1;
+++		a.value >>= 1;
+++	}
+++	str[min(size - 1, (size_t)64)] = 0;
+++	return 64;
+++}
++--- a/kernel/bpf/verifier.c
+++++ b/kernel/bpf/verifier.c
++@@ -1,22 +1,36 @@
+++// SPDX-License-Identifier: GPL-2.0-only
++ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
++- *
++- * This program is free software; you can redistribute it and/or
++- * modify it under the terms of version 2 of the GNU General Public
++- * License as published by the Free Software Foundation.
++- *
++- * This program is distributed in the hope that it will be useful, but
++- * WITHOUT ANY WARRANTY; without even the implied warranty of
++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++- * General Public License for more details.
+++ * Copyright (c) 2016 Facebook
+++ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
++  */
+++#include <uapi/linux/btf.h>
++ #include <linux/kernel.h>
++ #include <linux/types.h>
++ #include <linux/slab.h>
++ #include <linux/bpf.h>
+++#include <linux/btf.h>
+++#include <linux/bpf_verifier.h>
++ #include <linux/filter.h>
++ #include <net/netlink.h>
++ #include <linux/file.h>
++ #include <linux/vmalloc.h>
+++#include <linux/stringify.h>
+++#include <linux/bsearch.h>
+++#include <linux/sort.h>
+++#include <linux/perf_event.h>
+++#include <linux/ctype.h>
+++#include <linux/overflow.h>
+++
+++#include "disasm.h"
+++
+++static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
+++#define BPF_PROG_TYPE(_id, _name) \
+++	[_id] = & _name ## _verifier_ops,
+++#define BPF_MAP_TYPE(_id, _ops)
+++#include <linux/bpf_types.h>
+++#undef BPF_PROG_TYPE
+++#undef BPF_MAP_TYPE
+++};
++ 
++ /* bpf_check() is a static code analyzer that walks eBPF program
++  * instruction by instruction and updates register/stack state.
++@@ -30,7 +44,7 @@
++  * - out of bounds or malformed jumps
++  * The second pass is all possible path descent from the 1st insn.
++  * Since it's analyzing all pathes through the program, the length of the
++- * analysis is limited to 32k insn, which may be hit even if total number of
+++ * analysis is limited to 64k insn, which may be hit even if total number of
++  * insn is less then 4K, but there are too many branches that change stack/regs.
++  * Number of 'branches to be analyzed' is limited to 1k
++  *
++@@ -58,13 +72,13 @@
++  * (and -20 constant is saved for further stack bounds checking).
++  * Meaning that this reg is a pointer to stack plus known immediate constant.
++  *
++- * Most of the time the registers have UNKNOWN_VALUE type, which
+++ * Most of the time the registers have SCALAR_VALUE type, which
++  * means the register has some value, but it's not a valid pointer.
++- * (like pointer plus pointer becomes UNKNOWN_VALUE type)
+++ * (like pointer plus pointer becomes SCALAR_VALUE type)
++  *
++  * When verifier sees load or store instructions the type of base register
++- * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
++- * types recognized by check_mem_access() function.
+++ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
+++ * four pointer types recognized by check_mem_access() function.
++  *
++  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
++  * and the range of [ptr, ptr + map's value_size) is accessible.
++@@ -123,346 +137,713 @@
++  *
++  * After the call R0 is set to return type of the function and registers R1-R5
++  * are set to NOT_INIT to indicate that they are no longer readable.
+++ *
+++ * The following reference types represent a potential reference to a kernel
+++ * resource which, after first being allocated, must be checked and freed by
+++ * the BPF program:
+++ * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
+++ *
+++ * When the verifier sees a helper call return a reference type, it allocates a
+++ * pointer id for the reference and stores it in the current function state.
+++ * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
+++ * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
+++ * passes through a NULL-check conditional. For the branch wherein the state is
+++ * changed to CONST_IMM, the verifier releases the reference.
+++ *
+++ * For each helper function that allocates a reference, such as
+++ * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
+++ * bpf_sk_release(). When a reference type passes into the release function,
+++ * the verifier also releases the reference. If any unchecked or unreleased
+++ * reference remains at the end of the program, the verifier rejects it.
++  */
++ 
++-/* types of values stored in eBPF registers */
++-enum bpf_reg_type {
++-	NOT_INIT = 0,		 /* nothing was written into register */
++-	UNKNOWN_VALUE,		 /* reg doesn't contain a valid pointer */
++-	PTR_TO_CTX,		 /* reg points to bpf_context */
++-	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
++-	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
++-	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
++-	FRAME_PTR,		 /* reg == frame_pointer */
++-	PTR_TO_STACK,		 /* reg == frame_pointer + imm */
++-	CONST_IMM,		 /* constant integer value */
++-};
++-
++-struct reg_state {
++-	enum bpf_reg_type type;
++-	union {
++-		/* valid when type == CONST_IMM | PTR_TO_STACK */
++-		int imm;
++-
++-		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
++-		 *   PTR_TO_MAP_VALUE_OR_NULL
++-		 */
++-		struct bpf_map *map_ptr;
++-	};
++-};
++-
++-enum bpf_stack_slot_type {
++-	STACK_INVALID,    /* nothing was stored in this stack slot */
++-	STACK_SPILL,      /* register spilled into stack */
++-	STACK_MISC	  /* BPF program wrote some data into this slot */
++-};
++-
++-#define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
++-
++-/* state of the program:
++- * type of all registers and stack info
++- */
++-struct verifier_state {
++-	struct reg_state regs[MAX_BPF_REG];
++-	u8 stack_slot_type[MAX_BPF_STACK];
++-	struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
++-};
++-
++-/* linked list of verifier states used to prune search */
++-struct verifier_state_list {
++-	struct verifier_state state;
++-	struct verifier_state_list *next;
++-};
++-
++ /* verifier_state + insn_idx are pushed to stack when branch is encountered */
++-struct verifier_stack_elem {
+++struct bpf_verifier_stack_elem {
++ 	/* verifer state is 'st'
++ 	 * before processing instruction 'insn_idx'
++ 	 * and after processing instruction 'prev_insn_idx'
++ 	 */
++-	struct verifier_state st;
+++	struct bpf_verifier_state st;
++ 	int insn_idx;
++ 	int prev_insn_idx;
++-	struct verifier_stack_elem *next;
+++	struct bpf_verifier_stack_elem *next;
++ };
++ 
++-#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+++#define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
+++#define BPF_COMPLEXITY_LIMIT_STATES	64
++ 
++-/* single container for all structs
++- * one verifier_env per bpf_check() call
++- */
++-struct verifier_env {
++-	struct bpf_prog *prog;		/* eBPF program being verified */
++-	struct verifier_stack_elem *head; /* stack of verifier states to be processed */
++-	int stack_size;			/* number of states to be processed */
++-	struct verifier_state cur_state; /* current verifier state */
++-	struct verifier_state_list **explored_states; /* search pruning optimization */
++-	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
++-	u32 used_map_cnt;		/* number of used maps */
++-	bool allow_ptr_leaks;
++-};
+++#define BPF_MAP_PTR_UNPRIV	1UL
+++#define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
+++					  POISON_POINTER_DELTA))
+++#define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
++ 
++-/* verbose verifier prints what it's seeing
++- * bpf_check() is called under lock, so no race to access these global vars
++- */
++-static u32 log_level, log_size, log_len;
++-static char *log_buf;
+++static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
+++{
+++	return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
+++}
+++
+++static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
+++{
+++	return aux->map_state & BPF_MAP_PTR_UNPRIV;
+++}
+++
+++static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
+++			      const struct bpf_map *map, bool unpriv)
+++{
+++	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
+++	unpriv |= bpf_map_ptr_unpriv(aux);
+++	aux->map_state = (unsigned long)map |
+++			 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
+++}
+++
+++struct bpf_call_arg_meta {
+++	struct bpf_map *map_ptr;
+++	bool raw_mode;
+++	bool pkt_access;
+++	int regno;
+++	int access_size;
+++	u64 msize_max_value;
+++	int ref_obj_id;
+++	int func_id;
+++};
++ 
++ static DEFINE_MUTEX(bpf_verifier_lock);
++ 
+++static const struct bpf_line_info *
+++find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
+++{
+++	const struct bpf_line_info *linfo;
+++	const struct bpf_prog *prog;
+++	u32 i, nr_linfo;
+++
+++	prog = env->prog;
+++	nr_linfo = prog->aux->nr_linfo;
+++
+++	if (!nr_linfo || insn_off >= prog->len)
+++		return NULL;
+++
+++	linfo = prog->aux->linfo;
+++	for (i = 1; i < nr_linfo; i++)
+++		if (insn_off < linfo[i].insn_off)
+++			break;
+++
+++	return &linfo[i - 1];
+++}
+++
+++void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
+++		       va_list args)
+++{
+++	unsigned int n;
+++
+++	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
+++
+++	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
+++		  "verifier log line truncated - local buffer too short\n");
+++
+++	n = min(log->len_total - log->len_used - 1, n);
+++	log->kbuf[n] = '\0';
+++
+++	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
+++		log->len_used += n;
+++	else
+++		log->ubuf = NULL;
+++}
+++
++ /* log_level controls verbosity level of eBPF verifier.
++- * verbose() is used to dump the verification trace to the log, so the user
++- * can figure out what's wrong with the program
+++ * bpf_verifier_log_write() is used to dump the verification trace to the log,
+++ * so the user can figure out what's wrong with the program
++  */
++-static __printf(1, 2) void verbose(const char *fmt, ...)
+++__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
+++					   const char *fmt, ...)
++ {
++ 	va_list args;
++ 
++-	if (log_level == 0 || log_len >= log_size - 1)
+++	if (!bpf_verifier_log_needed(&env->log))
++ 		return;
++ 
++ 	va_start(args, fmt);
++-	log_len += vscnprintf(log_buf + log_len, log_size - log_len, fmt, args);
+++	bpf_verifier_vlog(&env->log, fmt, args);
++ 	va_end(args);
++ }
+++EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
+++
+++__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
+++{
+++	struct bpf_verifier_env *env = private_data;
+++	va_list args;
+++
+++	if (!bpf_verifier_log_needed(&env->log))
+++		return;
+++
+++	va_start(args, fmt);
+++	bpf_verifier_vlog(&env->log, fmt, args);
+++	va_end(args);
+++}
+++
+++static const char *ltrim(const char *s)
+++{
+++	while (isspace(*s))
+++		s++;
+++
+++	return s;
+++}
+++
+++__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
+++					 u32 insn_off,
+++					 const char *prefix_fmt, ...)
+++{
+++	const struct bpf_line_info *linfo;
+++
+++	if (!bpf_verifier_log_needed(&env->log))
+++		return;
+++
+++	linfo = find_linfo(env, insn_off);
+++	if (!linfo || linfo == env->prev_linfo)
+++		return;
+++
+++	if (prefix_fmt) {
+++		va_list args;
+++
+++		va_start(args, prefix_fmt);
+++		bpf_verifier_vlog(&env->log, prefix_fmt, args);
+++		va_end(args);
+++	}
+++
+++	verbose(env, "%s\n",
+++		ltrim(btf_name_by_offset(env->prog->aux->btf,
+++					 linfo->line_off)));
+++
+++	env->prev_linfo = linfo;
+++}
+++
+++static bool type_is_pkt_pointer(enum bpf_reg_type type)
+++{
+++	return type == PTR_TO_PACKET ||
+++	       type == PTR_TO_PACKET_META;
+++}
+++
+++static bool type_is_sk_pointer(enum bpf_reg_type type)
+++{
+++	return type == PTR_TO_SOCKET ||
+++		type == PTR_TO_SOCK_COMMON ||
+++		type == PTR_TO_TCP_SOCK ||
+++		type == PTR_TO_XDP_SOCK;
+++}
+++
+++static bool reg_type_may_be_null(enum bpf_reg_type type)
+++{
+++	return type == PTR_TO_MAP_VALUE_OR_NULL ||
+++	       type == PTR_TO_SOCKET_OR_NULL ||
+++	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
+++	       type == PTR_TO_TCP_SOCK_OR_NULL;
+++}
+++
+++static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
+++{
+++	return reg->type == PTR_TO_MAP_VALUE &&
+++		map_value_has_spin_lock(reg->map_ptr);
+++}
+++
+++static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
+++{
+++	return type == PTR_TO_SOCKET ||
+++		type == PTR_TO_SOCKET_OR_NULL ||
+++		type == PTR_TO_TCP_SOCK ||
+++		type == PTR_TO_TCP_SOCK_OR_NULL;
+++}
+++
+++static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
+++{
+++	return type == ARG_PTR_TO_SOCK_COMMON;
+++}
+++
+++/* Determine whether the function releases some resources allocated by another
+++ * function call. The first reference type argument will be assumed to be
+++ * released by release_reference().
+++ */
+++static bool is_release_function(enum bpf_func_id func_id)
+++{
+++	return func_id == BPF_FUNC_sk_release;
+++}
+++
+++static bool is_acquire_function(enum bpf_func_id func_id)
+++{
+++	return func_id == BPF_FUNC_sk_lookup_tcp ||
+++		func_id == BPF_FUNC_sk_lookup_udp ||
+++		func_id == BPF_FUNC_skc_lookup_tcp;
+++}
+++
+++static bool is_ptr_cast_function(enum bpf_func_id func_id)
+++{
+++	return func_id == BPF_FUNC_tcp_sock ||
+++		func_id == BPF_FUNC_sk_fullsock;
+++}
++ 
++ /* string representation of 'enum bpf_reg_type' */
++ static const char * const reg_type_str[] = {
++ 	[NOT_INIT]		= "?",
++-	[UNKNOWN_VALUE]		= "inv",
+++	[SCALAR_VALUE]		= "inv",
++ 	[PTR_TO_CTX]		= "ctx",
++ 	[CONST_PTR_TO_MAP]	= "map_ptr",
++ 	[PTR_TO_MAP_VALUE]	= "map_value",
++ 	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
++-	[FRAME_PTR]		= "fp",
++ 	[PTR_TO_STACK]		= "fp",
++-	[CONST_IMM]		= "imm",
+++	[PTR_TO_PACKET]		= "pkt",
+++	[PTR_TO_PACKET_META]	= "pkt_meta",
+++	[PTR_TO_PACKET_END]	= "pkt_end",
+++	[PTR_TO_FLOW_KEYS]	= "flow_keys",
+++	[PTR_TO_SOCKET]		= "sock",
+++	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
+++	[PTR_TO_SOCK_COMMON]	= "sock_common",
+++	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
+++	[PTR_TO_TCP_SOCK]	= "tcp_sock",
+++	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
+++	[PTR_TO_TP_BUFFER]	= "tp_buffer",
+++	[PTR_TO_XDP_SOCK]	= "xdp_sock",
++ };
++ 
++-static void print_verifier_state(struct verifier_env *env)
+++static char slot_type_char[] = {
+++	[STACK_INVALID]	= '?',
+++	[STACK_SPILL]	= 'r',
+++	[STACK_MISC]	= 'm',
+++	[STACK_ZERO]	= '0',
+++};
+++
+++static void print_liveness(struct bpf_verifier_env *env,
+++			   enum bpf_reg_liveness live)
+++{
+++	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
+++	    verbose(env, "_");
+++	if (live & REG_LIVE_READ)
+++		verbose(env, "r");
+++	if (live & REG_LIVE_WRITTEN)
+++		verbose(env, "w");
+++	if (live & REG_LIVE_DONE)
+++		verbose(env, "D");
+++}
+++
+++static struct bpf_func_state *func(struct bpf_verifier_env *env,
+++				   const struct bpf_reg_state *reg)
++ {
+++	struct bpf_verifier_state *cur = env->cur_state;
+++
+++	return cur->frame[reg->frameno];
+++}
+++
+++static void print_verifier_state(struct bpf_verifier_env *env,
+++				 const struct bpf_func_state *state)
+++{
+++	const struct bpf_reg_state *reg;
++ 	enum bpf_reg_type t;
++ 	int i;
++ 
+++	if (state->frameno)
+++		verbose(env, " frame%d:", state->frameno);
++ 	for (i = 0; i < MAX_BPF_REG; i++) {
++-		t = env->cur_state.regs[i].type;
+++		reg = &state->regs[i];
+++		t = reg->type;
++ 		if (t == NOT_INIT)
++ 			continue;
++-		verbose(" R%d=%s", i, reg_type_str[t]);
++-		if (t == CONST_IMM || t == PTR_TO_STACK)
++-			verbose("%d", env->cur_state.regs[i].imm);
++-		else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
++-			 t == PTR_TO_MAP_VALUE_OR_NULL)
++-			verbose("(ks=%d,vs=%d)",
++-				env->cur_state.regs[i].map_ptr->key_size,
++-				env->cur_state.regs[i].map_ptr->value_size);
++-	}
++-	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
++-		if (env->cur_state.stack_slot_type[i] == STACK_SPILL)
++-			verbose(" fp%d=%s", -MAX_BPF_STACK + i,
++-				reg_type_str[env->cur_state.spilled_regs[i / BPF_REG_SIZE].type]);
++-	}
++-	verbose("\n");
++-}
++-
++-static const char *const bpf_class_string[] = {
++-	[BPF_LD]    = "ld",
++-	[BPF_LDX]   = "ldx",
++-	[BPF_ST]    = "st",
++-	[BPF_STX]   = "stx",
++-	[BPF_ALU]   = "alu",
++-	[BPF_JMP]   = "jmp",
++-	[BPF_RET]   = "BUG",
++-	[BPF_ALU64] = "alu64",
++-};
+++		verbose(env, " R%d", i);
+++		print_liveness(env, reg->live);
+++		verbose(env, "=%s", reg_type_str[t]);
+++		if (t == SCALAR_VALUE && reg->precise)
+++			verbose(env, "P");
+++		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
+++		    tnum_is_const(reg->var_off)) {
+++			/* reg->off should be 0 for SCALAR_VALUE */
+++			verbose(env, "%lld", reg->var_off.value + reg->off);
+++		} else {
+++			verbose(env, "(id=%d", reg->id);
+++			if (reg_type_may_be_refcounted_or_null(t))
+++				verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
+++			if (t != SCALAR_VALUE)
+++				verbose(env, ",off=%d", reg->off);
+++			if (type_is_pkt_pointer(t))
+++				verbose(env, ",r=%d", reg->range);
+++			else if (t == CONST_PTR_TO_MAP ||
+++				 t == PTR_TO_MAP_VALUE ||
+++				 t == PTR_TO_MAP_VALUE_OR_NULL)
+++				verbose(env, ",ks=%d,vs=%d",
+++					reg->map_ptr->key_size,
+++					reg->map_ptr->value_size);
+++			if (tnum_is_const(reg->var_off)) {
+++				/* Typically an immediate SCALAR_VALUE, but
+++				 * could be a pointer whose offset is too big
+++				 * for reg->off
+++				 */
+++				verbose(env, ",imm=%llx", reg->var_off.value);
+++			} else {
+++				if (reg->smin_value != reg->umin_value &&
+++				    reg->smin_value != S64_MIN)
+++					verbose(env, ",smin_value=%lld",
+++						(long long)reg->smin_value);
+++				if (reg->smax_value != reg->umax_value &&
+++				    reg->smax_value != S64_MAX)
+++					verbose(env, ",smax_value=%lld",
+++						(long long)reg->smax_value);
+++				if (reg->umin_value != 0)
+++					verbose(env, ",umin_value=%llu",
+++						(unsigned long long)reg->umin_value);
+++				if (reg->umax_value != U64_MAX)
+++					verbose(env, ",umax_value=%llu",
+++						(unsigned long long)reg->umax_value);
+++				if (!tnum_is_unknown(reg->var_off)) {
+++					char tn_buf[48];
++ 
++-static const char *const bpf_alu_string[16] = {
++-	[BPF_ADD >> 4]  = "+=",
++-	[BPF_SUB >> 4]  = "-=",
++-	[BPF_MUL >> 4]  = "*=",
++-	[BPF_DIV >> 4]  = "/=",
++-	[BPF_OR  >> 4]  = "|=",
++-	[BPF_AND >> 4]  = "&=",
++-	[BPF_LSH >> 4]  = "<<=",
++-	[BPF_RSH >> 4]  = ">>=",
++-	[BPF_NEG >> 4]  = "neg",
++-	[BPF_MOD >> 4]  = "%=",
++-	[BPF_XOR >> 4]  = "^=",
++-	[BPF_MOV >> 4]  = "=",
++-	[BPF_ARSH >> 4] = "s>>=",
++-	[BPF_END >> 4]  = "endian",
++-};
+++					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+++					verbose(env, ",var_off=%s", tn_buf);
+++				}
+++			}
+++			verbose(env, ")");
+++		}
+++	}
+++	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+++		char types_buf[BPF_REG_SIZE + 1];
+++		bool valid = false;
+++		int j;
+++
+++		for (j = 0; j < BPF_REG_SIZE; j++) {
+++			if (state->stack[i].slot_type[j] != STACK_INVALID)
+++				valid = true;
+++			types_buf[j] = slot_type_char[
+++					state->stack[i].slot_type[j]];
+++		}
+++		types_buf[BPF_REG_SIZE] = 0;
+++		if (!valid)
+++			continue;
+++		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+++		print_liveness(env, state->stack[i].spilled_ptr.live);
+++		if (state->stack[i].slot_type[0] == STACK_SPILL) {
+++			reg = &state->stack[i].spilled_ptr;
+++			t = reg->type;
+++			verbose(env, "=%s", reg_type_str[t]);
+++			if (t == SCALAR_VALUE && reg->precise)
+++				verbose(env, "P");
+++			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
+++				verbose(env, "%lld", reg->var_off.value + reg->off);
+++		} else {
+++			verbose(env, "=%s", types_buf);
+++		}
+++	}
+++	if (state->acquired_refs && state->refs[0].id) {
+++		verbose(env, " refs=%d", state->refs[0].id);
+++		for (i = 1; i < state->acquired_refs; i++)
+++			if (state->refs[i].id)
+++				verbose(env, ",%d", state->refs[i].id);
+++	}
+++	verbose(env, "\n");
+++}
++ 
++-static const char *const bpf_ldst_string[] = {
++-	[BPF_W >> 3]  = "u32",
++-	[BPF_H >> 3]  = "u16",
++-	[BPF_B >> 3]  = "u8",
++-	[BPF_DW >> 3] = "u64",
++-};
+++#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)				\
+++static int copy_##NAME##_state(struct bpf_func_state *dst,		\
+++			       const struct bpf_func_state *src)	\
+++{									\
+++	if (!src->FIELD)						\
+++		return 0;						\
+++	if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {			\
+++		/* internal bug, make state invalid to reject the program */ \
+++		memset(dst, 0, sizeof(*dst));				\
+++		return -EFAULT;						\
+++	}								\
+++	memcpy(dst->FIELD, src->FIELD,					\
+++	       sizeof(*src->FIELD) * (src->COUNT / SIZE));		\
+++	return 0;							\
+++}
+++/* copy_reference_state() */
+++COPY_STATE_FN(reference, acquired_refs, refs, 1)
+++/* copy_stack_state() */
+++COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
+++#undef COPY_STATE_FN
+++
+++#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)			\
+++static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
+++				  bool copy_old)			\
+++{									\
+++	u32 old_size = state->COUNT;					\
+++	struct bpf_##NAME##_state *new_##FIELD;				\
+++	int slot = size / SIZE;						\
+++									\
+++	if (size <= old_size || !size) {				\
+++		if (copy_old)						\
+++			return 0;					\
+++		state->COUNT = slot * SIZE;				\
+++		if (!size && old_size) {				\
+++			kfree(state->FIELD);				\
+++			state->FIELD = NULL;				\
+++		}							\
+++		return 0;						\
+++	}								\
+++	new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
+++				    GFP_KERNEL);			\
+++	if (!new_##FIELD)						\
+++		return -ENOMEM;						\
+++	if (copy_old) {							\
+++		if (state->FIELD)					\
+++			memcpy(new_##FIELD, state->FIELD,		\
+++			       sizeof(*new_##FIELD) * (old_size / SIZE)); \
+++		memset(new_##FIELD + old_size / SIZE, 0,		\
+++		       sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
+++	}								\
+++	state->COUNT = slot * SIZE;					\
+++	kfree(state->FIELD);						\
+++	state->FIELD = new_##FIELD;					\
+++	return 0;							\
+++}
+++/* realloc_reference_state() */
+++REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
+++/* realloc_stack_state() */
+++REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
+++#undef REALLOC_STATE_FN
+++
+++/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
+++ * make it consume minimal amount of memory. check_stack_write() access from
+++ * the program calls into realloc_func_state() to grow the stack size.
+++ * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
+++ * which realloc_stack_state() copies over. It points to previous
+++ * bpf_verifier_state which is never reallocated.
+++ */
+++static int realloc_func_state(struct bpf_func_state *state, int stack_size,
+++			      int refs_size, bool copy_old)
+++{
+++	int err = realloc_reference_state(state, refs_size, copy_old);
+++	if (err)
+++		return err;
+++	return realloc_stack_state(state, stack_size, copy_old);
+++}
++ 
++-static const char *const bpf_jmp_string[16] = {
++-	[BPF_JA >> 4]   = "jmp",
++-	[BPF_JEQ >> 4]  = "==",
++-	[BPF_JGT >> 4]  = ">",
++-	[BPF_JGE >> 4]  = ">=",
++-	[BPF_JSET >> 4] = "&",
++-	[BPF_JNE >> 4]  = "!=",
++-	[BPF_JSGT >> 4] = "s>",
++-	[BPF_JSGE >> 4] = "s>=",
++-	[BPF_CALL >> 4] = "call",
++-	[BPF_EXIT >> 4] = "exit",
++-};
+++/* Acquire a pointer id from the env and update the state->refs to include
+++ * this new pointer reference.
+++ * On success, returns a valid pointer id to associate with the register
+++ * On failure, returns a negative errno.
+++ */
+++static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
+++{
+++	struct bpf_func_state *state = cur_func(env);
+++	int new_ofs = state->acquired_refs;
+++	int id, err;
++ 
++-static void print_bpf_insn(const struct verifier_env *env,
++-			   const struct bpf_insn *insn)
+++	err = realloc_reference_state(state, state->acquired_refs + 1, true);
+++	if (err)
+++		return err;
+++	id = ++env->id_gen;
+++	state->refs[new_ofs].id = id;
+++	state->refs[new_ofs].insn_idx = insn_idx;
+++
+++	return id;
+++}
+++
+++/* release function corresponding to acquire_reference_state(). Idempotent. */
+++static int release_reference_state(struct bpf_func_state *state, int ptr_id)
++ {
++-	u8 class = BPF_CLASS(insn->code);
+++	int i, last_idx;
++ 
++-	if (class == BPF_ALU || class == BPF_ALU64) {
++-		if (BPF_SRC(insn->code) == BPF_X)
++-			verbose("(%02x) %sr%d %s %sr%d\n",
++-				insn->code, class == BPF_ALU ? "(u32) " : "",
++-				insn->dst_reg,
++-				bpf_alu_string[BPF_OP(insn->code) >> 4],
++-				class == BPF_ALU ? "(u32) " : "",
++-				insn->src_reg);
++-		else
++-			verbose("(%02x) %sr%d %s %s%d\n",
++-				insn->code, class == BPF_ALU ? "(u32) " : "",
++-				insn->dst_reg,
++-				bpf_alu_string[BPF_OP(insn->code) >> 4],
++-				class == BPF_ALU ? "(u32) " : "",
++-				insn->imm);
++-	} else if (class == BPF_STX) {
++-		if (BPF_MODE(insn->code) == BPF_MEM)
++-			verbose("(%02x) *(%s *)(r%d %+d) = r%d\n",
++-				insn->code,
++-				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
++-				insn->dst_reg,
++-				insn->off, insn->src_reg);
++-		else if (BPF_MODE(insn->code) == BPF_XADD)
++-			verbose("(%02x) lock *(%s *)(r%d %+d) += r%d\n",
++-				insn->code,
++-				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
++-				insn->dst_reg, insn->off,
++-				insn->src_reg);
++-		else
++-			verbose("BUG_%02x\n", insn->code);
++-	} else if (class == BPF_ST) {
++-		if (BPF_MODE(insn->code) != BPF_MEM) {
++-			verbose("BUG_st_%02x\n", insn->code);
++-			return;
++-		}
++-		verbose("(%02x) *(%s *)(r%d %+d) = %d\n",
++-			insn->code,
++-			bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
++-			insn->dst_reg,
++-			insn->off, insn->imm);
++-	} else if (class == BPF_LDX) {
++-		if (BPF_MODE(insn->code) != BPF_MEM) {
++-			verbose("BUG_ldx_%02x\n", insn->code);
++-			return;
+++	last_idx = state->acquired_refs - 1;
+++	for (i = 0; i < state->acquired_refs; i++) {
+++		if (state->refs[i].id == ptr_id) {
+++			if (last_idx && i != last_idx)
+++				memcpy(&state->refs[i], &state->refs[last_idx],
+++				       sizeof(*state->refs));
+++			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
+++			state->acquired_refs--;
+++			return 0;
++ 		}
++-		verbose("(%02x) r%d = *(%s *)(r%d %+d)\n",
++-			insn->code, insn->dst_reg,
++-			bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
++-			insn->src_reg, insn->off);
++-	} else if (class == BPF_LD) {
++-		if (BPF_MODE(insn->code) == BPF_ABS) {
++-			verbose("(%02x) r0 = *(%s *)skb[%d]\n",
++-				insn->code,
++-				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
++-				insn->imm);
++-		} else if (BPF_MODE(insn->code) == BPF_IND) {
++-			verbose("(%02x) r0 = *(%s *)skb[r%d + %d]\n",
++-				insn->code,
++-				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
++-				insn->src_reg, insn->imm);
++-		} else if (BPF_MODE(insn->code) == BPF_IMM &&
++-			   BPF_SIZE(insn->code) == BPF_DW) {
++-			/* At this point, we already made sure that the second
++-			 * part of the ldimm64 insn is accessible.
++-			 */
++-			u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
++-			bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
+++	}
+++	return -EINVAL;
+++}
++ 
++-			if (map_ptr && !env->allow_ptr_leaks)
++-				imm = 0;
+++static int transfer_reference_state(struct bpf_func_state *dst,
+++				    struct bpf_func_state *src)
+++{
+++	int err = realloc_reference_state(dst, src->acquired_refs, false);
+++	if (err)
+++		return err;
+++	err = copy_reference_state(dst, src);
+++	if (err)
+++		return err;
+++	return 0;
+++}
++ 
++-			verbose("(%02x) r%d = 0x%llx\n", insn->code,
++-				insn->dst_reg, (unsigned long long)imm);
++-		} else {
++-			verbose("BUG_ld_%02x\n", insn->code);
++-			return;
++-		}
++-	} else if (class == BPF_JMP) {
++-		u8 opcode = BPF_OP(insn->code);
+++static void free_func_state(struct bpf_func_state *state)
+++{
+++	if (!state)
+++		return;
+++	kfree(state->refs);
+++	kfree(state->stack);
+++	kfree(state);
+++}
++ 
++-		if (opcode == BPF_CALL) {
++-			verbose("(%02x) call %d\n", insn->code, insn->imm);
++-		} else if (insn->code == (BPF_JMP | BPF_JA)) {
++-			verbose("(%02x) goto pc%+d\n",
++-				insn->code, insn->off);
++-		} else if (insn->code == (BPF_JMP | BPF_EXIT)) {
++-			verbose("(%02x) exit\n", insn->code);
++-		} else if (BPF_SRC(insn->code) == BPF_X) {
++-			verbose("(%02x) if r%d %s r%d goto pc%+d\n",
++-				insn->code, insn->dst_reg,
++-				bpf_jmp_string[BPF_OP(insn->code) >> 4],
++-				insn->src_reg, insn->off);
++-		} else {
++-			verbose("(%02x) if r%d %s 0x%x goto pc%+d\n",
++-				insn->code, insn->dst_reg,
++-				bpf_jmp_string[BPF_OP(insn->code) >> 4],
++-				insn->imm, insn->off);
+++static void clear_jmp_history(struct bpf_verifier_state *state)
+++{
+++	kfree(state->jmp_history);
+++	state->jmp_history = NULL;
+++	state->jmp_history_cnt = 0;
+++}
+++
+++static void free_verifier_state(struct bpf_verifier_state *state,
+++				bool free_self)
+++{
+++	int i;
+++
+++	for (i = 0; i <= state->curframe; i++) {
+++		free_func_state(state->frame[i]);
+++		state->frame[i] = NULL;
+++	}
+++	clear_jmp_history(state);
+++	if (free_self)
+++		kfree(state);
+++}
+++
+++/* copy verifier state from src to dst growing dst stack space
+++ * when necessary to accommodate larger src stack
+++ */
+++static int copy_func_state(struct bpf_func_state *dst,
+++			   const struct bpf_func_state *src)
+++{
+++	int err;
+++
+++	err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
+++				 false);
+++	if (err)
+++		return err;
+++	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
+++	err = copy_reference_state(dst, src);
+++	if (err)
+++		return err;
+++	return copy_stack_state(dst, src);
+++}
+++
+++static int copy_verifier_state(struct bpf_verifier_state *dst_state,
+++			       const struct bpf_verifier_state *src)
+++{
+++	struct bpf_func_state *dst;
+++	u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
+++	int i, err;
+++
+++	if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
+++		kfree(dst_state->jmp_history);
+++		dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
+++		if (!dst_state->jmp_history)
+++			return -ENOMEM;
+++	}
+++	memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
+++	dst_state->jmp_history_cnt = src->jmp_history_cnt;
+++
+++	/* if dst has more stack frames then src frame, free them */
+++	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
+++		free_func_state(dst_state->frame[i]);
+++		dst_state->frame[i] = NULL;
+++	}
+++	dst_state->speculative = src->speculative;
+++	dst_state->curframe = src->curframe;
+++	dst_state->active_spin_lock = src->active_spin_lock;
+++	dst_state->branches = src->branches;
+++	dst_state->parent = src->parent;
+++	dst_state->first_insn_idx = src->first_insn_idx;
+++	dst_state->last_insn_idx = src->last_insn_idx;
+++	for (i = 0; i <= src->curframe; i++) {
+++		dst = dst_state->frame[i];
+++		if (!dst) {
+++			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+++			if (!dst)
+++				return -ENOMEM;
+++			dst_state->frame[i] = dst;
++ 		}
++-	} else {
++-		verbose("(%02x) %s\n", insn->code, bpf_class_string[class]);
+++		err = copy_func_state(dst, src->frame[i]);
+++		if (err)
+++			return err;
++ 	}
+++	return 0;
++ }
++ 
++-static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
+++static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
++ {
++-	struct verifier_stack_elem *elem;
++-	int insn_idx;
+++	while (st) {
+++		u32 br = --st->branches;
+++
+++		/* WARN_ON(br > 1) technically makes sense here,
+++		 * but see comment in push_stack(), hence:
+++		 */
+++		WARN_ONCE((int)br < 0,
+++			  "BUG update_branch_counts:branches_to_explore=%d\n",
+++			  br);
+++		if (br)
+++			break;
+++		st = st->parent;
+++	}
+++}
+++
+++static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
+++		     int *insn_idx)
+++{
+++	struct bpf_verifier_state *cur = env->cur_state;
+++	struct bpf_verifier_stack_elem *elem, *head = env->head;
+++	int err;
++ 
++ 	if (env->head == NULL)
++-		return -1;
+++		return -ENOENT;
++ 
++-	memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state));
++-	insn_idx = env->head->insn_idx;
+++	if (cur) {
+++		err = copy_verifier_state(cur, &head->st);
+++		if (err)
+++			return err;
+++	}
+++	if (insn_idx)
+++		*insn_idx = head->insn_idx;
++ 	if (prev_insn_idx)
++-		*prev_insn_idx = env->head->prev_insn_idx;
++-	elem = env->head->next;
++-	kfree(env->head);
+++		*prev_insn_idx = head->prev_insn_idx;
+++	elem = head->next;
+++	free_verifier_state(&head->st, false);
+++	kfree(head);
++ 	env->head = elem;
++ 	env->stack_size--;
++-	return insn_idx;
+++	return 0;
++ }
++ 
++-static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
++-					 int prev_insn_idx)
+++static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
+++					     int insn_idx, int prev_insn_idx,
+++					     bool speculative)
++ {
++-	struct verifier_stack_elem *elem;
+++	struct bpf_verifier_state *cur = env->cur_state;
+++	struct bpf_verifier_stack_elem *elem;
+++	int err;
++ 
++-	elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
+++	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
++ 	if (!elem)
++ 		goto err;
++ 
++-	memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state));
++ 	elem->insn_idx = insn_idx;
++ 	elem->prev_insn_idx = prev_insn_idx;
++ 	elem->next = env->head;
++ 	env->head = elem;
++ 	env->stack_size++;
++-	if (env->stack_size > 1024) {
++-		verbose("BPF program is too complex\n");
+++	err = copy_verifier_state(&elem->st, cur);
+++	if (err)
+++		goto err;
+++	elem->st.speculative |= speculative;
+++	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
+++		verbose(env, "The sequence of %d jumps is too complex.\n",
+++			env->stack_size);
++ 		goto err;
++ 	}
+++	if (elem->st.parent) {
+++		++elem->st.parent->branches;
+++		/* WARN_ON(branches > 2) technically makes sense here,
+++		 * but
+++		 * 1. speculative states will bump 'branches' for non-branch
+++		 * instructions
+++		 * 2. is_state_visited() heuristics may decide not to create
+++		 * a new state for a sequence of branches and all such current
+++		 * and cloned states will be pointing to a single parent state
+++		 * which might have large 'branches' count.
+++		 */
+++	}
++ 	return &elem->st;
++ err:
+++	free_verifier_state(env->cur_state, true);
+++	env->cur_state = NULL;
++ 	/* pop all elements and return */
++-	while (pop_stack(env, NULL) >= 0);
+++	while (!pop_stack(env, NULL, NULL));
++ 	return NULL;
++ }
++ 
++@@ -471,29 +852,225 @@ static const int caller_saved[CALLER_SAV
++ 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
++ };
++ 
++-static void init_reg_state(struct reg_state *regs)
+++static void __mark_reg_not_init(const struct bpf_verifier_env *env,
+++				struct bpf_reg_state *reg);
+++
+++/* Mark the unknown part of a register (variable offset or scalar value) as
+++ * known to have the value @imm.
+++ */
+++static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
+++{
+++	/* Clear id, off, and union(map_ptr, range) */
+++	memset(((u8 *)reg) + sizeof(reg->type), 0,
+++	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
+++	reg->var_off = tnum_const(imm);
+++	reg->smin_value = (s64)imm;
+++	reg->smax_value = (s64)imm;
+++	reg->umin_value = imm;
+++	reg->umax_value = imm;
+++}
+++
+++/* Mark the 'variable offset' part of a register as zero.  This should be
+++ * used only on registers holding a pointer type.
+++ */
+++static void __mark_reg_known_zero(struct bpf_reg_state *reg)
+++{
+++	__mark_reg_known(reg, 0);
+++}
+++
+++static void __mark_reg_const_zero(struct bpf_reg_state *reg)
+++{
+++	__mark_reg_known(reg, 0);
+++	reg->type = SCALAR_VALUE;
+++}
+++
+++static void mark_reg_known_zero(struct bpf_verifier_env *env,
+++				struct bpf_reg_state *regs, u32 regno)
+++{
+++	if (WARN_ON(regno >= MAX_BPF_REG)) {
+++		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
+++		/* Something bad happened, let's kill all regs */
+++		for (regno = 0; regno < MAX_BPF_REG; regno++)
+++			__mark_reg_not_init(env, regs + regno);
+++		return;
+++	}
+++	__mark_reg_known_zero(regs + regno);
+++}
+++
+++static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
+++{
+++	return type_is_pkt_pointer(reg->type);
+++}
+++
+++static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
+++{
+++	return reg_is_pkt_pointer(reg) ||
+++	       reg->type == PTR_TO_PACKET_END;
+++}
+++
+++/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
+++static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
+++				    enum bpf_reg_type which)
+++{
+++	/* The register can already have a range from prior markings.
+++	 * This is fine as long as it hasn't been advanced from its
+++	 * origin.
+++	 */
+++	return reg->type == which &&
+++	       reg->id == 0 &&
+++	       reg->off == 0 &&
+++	       tnum_equals_const(reg->var_off, 0);
+++}
+++
+++/* Attempts to improve min/max values based on var_off information */
+++static void __update_reg_bounds(struct bpf_reg_state *reg)
+++{
+++	/* min signed is max(sign bit) | min(other bits) */
+++	reg->smin_value = max_t(s64, reg->smin_value,
+++				reg->var_off.value | (reg->var_off.mask & S64_MIN));
+++	/* max signed is min(sign bit) | max(other bits) */
+++	reg->smax_value = min_t(s64, reg->smax_value,
+++				reg->var_off.value | (reg->var_off.mask & S64_MAX));
+++	reg->umin_value = max(reg->umin_value, reg->var_off.value);
+++	reg->umax_value = min(reg->umax_value,
+++			      reg->var_off.value | reg->var_off.mask);
+++}
+++
+++/* Uses signed min/max values to inform unsigned, and vice-versa */
+++static void __reg_deduce_bounds(struct bpf_reg_state *reg)
++ {
+++	/* Learn sign from signed bounds.
+++	 * If we cannot cross the sign boundary, then signed and unsigned bounds
+++	 * are the same, so combine.  This works even in the negative case, e.g.
+++	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
+++	 */
+++	if (reg->smin_value >= 0 || reg->smax_value < 0) {
+++		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
+++							  reg->umin_value);
+++		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
+++							  reg->umax_value);
+++		return;
+++	}
+++	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
+++	 * boundary, so we must be careful.
+++	 */
+++	if ((s64)reg->umax_value >= 0) {
+++		/* Positive.  We can't learn anything from the smin, but smax
+++		 * is positive, hence safe.
+++		 */
+++		reg->smin_value = reg->umin_value;
+++		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
+++							  reg->umax_value);
+++	} else if ((s64)reg->umin_value < 0) {
+++		/* Negative.  We can't learn anything from the smax, but smin
+++		 * is negative, hence safe.
+++		 */
+++		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
+++							  reg->umin_value);
+++		reg->smax_value = reg->umax_value;
+++	}
+++}
+++
+++/* Attempts to improve var_off based on unsigned min/max information */
+++static void __reg_bound_offset(struct bpf_reg_state *reg)
+++{
+++	reg->var_off = tnum_intersect(reg->var_off,
+++				      tnum_range(reg->umin_value,
+++						 reg->umax_value));
+++}
+++
+++/* Reset the min/max bounds of a register */
+++static void __mark_reg_unbounded(struct bpf_reg_state *reg)
+++{
+++	reg->smin_value = S64_MIN;
+++	reg->smax_value = S64_MAX;
+++	reg->umin_value = 0;
+++	reg->umax_value = U64_MAX;
+++}
+++
+++/* Mark a register as having a completely unknown (scalar) value. */
+++static void __mark_reg_unknown(const struct bpf_verifier_env *env,
+++			       struct bpf_reg_state *reg)
+++{
+++	/*
+++	 * Clear type, id, off, and union(map_ptr, range) and
+++	 * padding between 'type' and union
+++	 */
+++	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
+++	reg->type = SCALAR_VALUE;
+++	reg->var_off = tnum_unknown;
+++	reg->frameno = 0;
+++	reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
+++		       true : false;
+++	__mark_reg_unbounded(reg);
+++}
+++
+++static void mark_reg_unknown(struct bpf_verifier_env *env,
+++			     struct bpf_reg_state *regs, u32 regno)
+++{
+++	if (WARN_ON(regno >= MAX_BPF_REG)) {
+++		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
+++		/* Something bad happened, let's kill all regs except FP */
+++		for (regno = 0; regno < BPF_REG_FP; regno++)
+++			__mark_reg_not_init(env, regs + regno);
+++		return;
+++	}
+++	__mark_reg_unknown(env, regs + regno);
+++}
+++
+++static void __mark_reg_not_init(const struct bpf_verifier_env *env,
+++				struct bpf_reg_state *reg)
+++{
+++	__mark_reg_unknown(env, reg);
+++	reg->type = NOT_INIT;
+++}
+++
+++static void mark_reg_not_init(struct bpf_verifier_env *env,
+++			      struct bpf_reg_state *regs, u32 regno)
+++{
+++	if (WARN_ON(regno >= MAX_BPF_REG)) {
+++		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
+++		/* Something bad happened, let's kill all regs except FP */
+++		for (regno = 0; regno < BPF_REG_FP; regno++)
+++			__mark_reg_not_init(env, regs + regno);
+++		return;
+++	}
+++	__mark_reg_not_init(env, regs + regno);
+++}
+++
+++#define DEF_NOT_SUBREG	(0)
+++static void init_reg_state(struct bpf_verifier_env *env,
+++			   struct bpf_func_state *state)
+++{
+++	struct bpf_reg_state *regs = state->regs;
++ 	int i;
++ 
++ 	for (i = 0; i < MAX_BPF_REG; i++) {
++-		regs[i].type = NOT_INIT;
++-		regs[i].imm = 0;
++-		regs[i].map_ptr = NULL;
+++		mark_reg_not_init(env, regs, i);
+++		regs[i].live = REG_LIVE_NONE;
+++		regs[i].parent = NULL;
+++		regs[i].subreg_def = DEF_NOT_SUBREG;
++ 	}
++ 
++ 	/* frame pointer */
++-	regs[BPF_REG_FP].type = FRAME_PTR;
+++	regs[BPF_REG_FP].type = PTR_TO_STACK;
+++	mark_reg_known_zero(env, regs, BPF_REG_FP);
+++	regs[BPF_REG_FP].frameno = state->frameno;
++ 
++ 	/* 1st arg to a function */
++ 	regs[BPF_REG_1].type = PTR_TO_CTX;
+++	mark_reg_known_zero(env, regs, BPF_REG_1);
++ }
++ 
++-static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
+++#define BPF_MAIN_FUNC (-1)
+++static void init_func_state(struct bpf_verifier_env *env,
+++			    struct bpf_func_state *state,
+++			    int callsite, int frameno, int subprogno)
++ {
++-	BUG_ON(regno >= MAX_BPF_REG);
++-	regs[regno].type = UNKNOWN_VALUE;
++-	regs[regno].imm = 0;
++-	regs[regno].map_ptr = NULL;
+++	state->callsite = callsite;
+++	state->frameno = frameno;
+++	state->subprogno = subprogno;
+++	init_reg_state(env, state);
++ }
++ 
++ enum reg_arg_type {
++@@ -502,44 +1079,760 @@ enum reg_arg_type {
++ 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
++ };
++ 
++-static int check_reg_arg(struct reg_state *regs, u32 regno,
+++static int cmp_subprogs(const void *a, const void *b)
+++{
+++	return ((struct bpf_subprog_info *)a)->start -
+++	       ((struct bpf_subprog_info *)b)->start;
+++}
+++
+++static int find_subprog(struct bpf_verifier_env *env, int off)
+++{
+++	struct bpf_subprog_info *p;
+++
+++	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
+++		    sizeof(env->subprog_info[0]), cmp_subprogs);
+++	if (!p)
+++		return -ENOENT;
+++	return p - env->subprog_info;
+++
+++}
+++
+++static int add_subprog(struct bpf_verifier_env *env, int off)
+++{
+++	int insn_cnt = env->prog->len;
+++	int ret;
+++
+++	if (off >= insn_cnt || off < 0) {
+++		verbose(env, "call to invalid destination\n");
+++		return -EINVAL;
+++	}
+++	ret = find_subprog(env, off);
+++	if (ret >= 0)
+++		return 0;
+++	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
+++		verbose(env, "too many subprograms\n");
+++		return -E2BIG;
+++	}
+++	env->subprog_info[env->subprog_cnt++].start = off;
+++	sort(env->subprog_info, env->subprog_cnt,
+++	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
+++	return 0;
+++}
+++
+++static int check_subprogs(struct bpf_verifier_env *env)
+++{
+++	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
+++	struct bpf_subprog_info *subprog = env->subprog_info;
+++	struct bpf_insn *insn = env->prog->insnsi;
+++	int insn_cnt = env->prog->len;
+++
+++	/* Add entry function. */
+++	ret = add_subprog(env, 0);
+++	if (ret < 0)
+++		return ret;
+++
+++	/* determine subprog starts. The end is one before the next starts */
+++	for (i = 0; i < insn_cnt; i++) {
+++		if (insn[i].code != (BPF_JMP | BPF_CALL))
+++			continue;
+++		if (insn[i].src_reg != BPF_PSEUDO_CALL)
+++			continue;
+++		if (!env->allow_ptr_leaks) {
+++			verbose(env, "function calls to other bpf functions are allowed for root only\n");
+++			return -EPERM;
+++		}
+++		ret = add_subprog(env, i + insn[i].imm + 1);
+++		if (ret < 0)
+++			return ret;
+++	}
+++
+++	/* Add a fake 'exit' subprog which could simplify subprog iteration
+++	 * logic. 'subprog_cnt' should not be increased.
+++	 */
+++	subprog[env->subprog_cnt].start = insn_cnt;
+++
+++	if (env->log.level & BPF_LOG_LEVEL2)
+++		for (i = 0; i < env->subprog_cnt; i++)
+++			verbose(env, "func#%d @%d\n", i, subprog[i].start);
+++
+++	/* now check that all jumps are within the same subprog */
+++	subprog_start = subprog[cur_subprog].start;
+++	subprog_end = subprog[cur_subprog + 1].start;
+++	for (i = 0; i < insn_cnt; i++) {
+++		u8 code = insn[i].code;
+++
+++		if (code == (BPF_JMP | BPF_CALL) &&
+++		    insn[i].imm == BPF_FUNC_tail_call &&
+++		    insn[i].src_reg != BPF_PSEUDO_CALL)
+++			subprog[cur_subprog].has_tail_call = true;
+++		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
+++			goto next;
+++		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
+++			goto next;
+++		off = i + insn[i].off + 1;
+++		if (off < subprog_start || off >= subprog_end) {
+++			verbose(env, "jump out of range from insn %d to %d\n", i, off);
+++			return -EINVAL;
+++		}
+++next:
+++		if (i == subprog_end - 1) {
+++			/* to avoid fall-through from one subprog into another
+++			 * the last insn of the subprog should be either exit
+++			 * or unconditional jump back
+++			 */
+++			if (code != (BPF_JMP | BPF_EXIT) &&
+++			    code != (BPF_JMP | BPF_JA)) {
+++				verbose(env, "last insn is not an exit or jmp\n");
+++				return -EINVAL;
+++			}
+++			subprog_start = subprog_end;
+++			cur_subprog++;
+++			if (cur_subprog < env->subprog_cnt)
+++				subprog_end = subprog[cur_subprog + 1].start;
+++		}
+++	}
+++	return 0;
+++}
+++
+++/* Parentage chain of this register (or stack slot) should take care of all
+++ * issues like callee-saved registers, stack slot allocation time, etc.
+++ */
+++static int mark_reg_read(struct bpf_verifier_env *env,
+++			 const struct bpf_reg_state *state,
+++			 struct bpf_reg_state *parent, u8 flag)
+++{
+++	bool writes = parent == state->parent; /* Observe write marks */
+++	int cnt = 0;
+++
+++	while (parent) {
+++		/* if read wasn't screened by an earlier write ... */
+++		if (writes && state->live & REG_LIVE_WRITTEN)
+++			break;
+++		if (parent->live & REG_LIVE_DONE) {
+++			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
+++				reg_type_str[parent->type],
+++				parent->var_off.value, parent->off);
+++			return -EFAULT;
+++		}
+++		/* The first condition is more likely to be true than the
+++		 * second, checked it first.
+++		 */
+++		if ((parent->live & REG_LIVE_READ) == flag ||
+++		    parent->live & REG_LIVE_READ64)
+++			/* The parentage chain never changes and
+++			 * this parent was already marked as LIVE_READ.
+++			 * There is no need to keep walking the chain again and
+++			 * keep re-marking all parents as LIVE_READ.
+++			 * This case happens when the same register is read
+++			 * multiple times without writes into it in-between.
+++			 * Also, if parent has the stronger REG_LIVE_READ64 set,
+++			 * then no need to set the weak REG_LIVE_READ32.
+++			 */
+++			break;
+++		/* ... then we depend on parent's value */
+++		parent->live |= flag;
+++		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
+++		if (flag == REG_LIVE_READ64)
+++			parent->live &= ~REG_LIVE_READ32;
+++		state = parent;
+++		parent = state->parent;
+++		writes = true;
+++		cnt++;
+++	}
+++
+++	if (env->longest_mark_read_walk < cnt)
+++		env->longest_mark_read_walk = cnt;
+++	return 0;
+++}
+++
+++/* This function is supposed to be used by the following 32-bit optimization
+++ * code only. It returns TRUE if the source or destination register operates
+++ * on 64-bit, otherwise return FALSE.
+++ */
+++static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
+++		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
+++{
+++	u8 code, class, op;
+++
+++	code = insn->code;
+++	class = BPF_CLASS(code);
+++	op = BPF_OP(code);
+++	if (class == BPF_JMP) {
+++		/* BPF_EXIT for "main" will reach here. Return TRUE
+++		 * conservatively.
+++		 */
+++		if (op == BPF_EXIT)
+++			return true;
+++		if (op == BPF_CALL) {
+++			/* BPF to BPF call will reach here because of marking
+++			 * caller saved clobber with DST_OP_NO_MARK for which we
+++			 * don't care the register def because they are anyway
+++			 * marked as NOT_INIT already.
+++			 */
+++			if (insn->src_reg == BPF_PSEUDO_CALL)
+++				return false;
+++			/* Helper call will reach here because of arg type
+++			 * check, conservatively return TRUE.
+++			 */
+++			if (t == SRC_OP)
+++				return true;
+++
+++			return false;
+++		}
+++	}
+++
+++	if (class == BPF_ALU64 || class == BPF_JMP ||
+++	    /* BPF_END always use BPF_ALU class. */
+++	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
+++		return true;
+++
+++	if (class == BPF_ALU || class == BPF_JMP32)
+++		return false;
+++
+++	if (class == BPF_LDX) {
+++		if (t != SRC_OP)
+++			return BPF_SIZE(code) == BPF_DW;
+++		/* LDX source must be ptr. */
+++		return true;
+++	}
+++
+++	if (class == BPF_STX) {
+++		if (reg->type != SCALAR_VALUE)
+++			return true;
+++		return BPF_SIZE(code) == BPF_DW;
+++	}
+++
+++	if (class == BPF_LD) {
+++		u8 mode = BPF_MODE(code);
+++
+++		/* LD_IMM64 */
+++		if (mode == BPF_IMM)
+++			return true;
+++
+++		/* Both LD_IND and LD_ABS return 32-bit data. */
+++		if (t != SRC_OP)
+++			return  false;
+++
+++		/* Implicit ctx ptr. */
+++		if (regno == BPF_REG_6)
+++			return true;
+++
+++		/* Explicit source could be any width. */
+++		return true;
+++	}
+++
+++	if (class == BPF_ST)
+++		/* The only source register for BPF_ST is a ptr. */
+++		return true;
+++
+++	/* Conservatively return true at default. */
+++	return true;
+++}
+++
+++/* Return TRUE if INSN doesn't have explicit value define. */
+++static bool insn_no_def(struct bpf_insn *insn)
+++{
+++	u8 class = BPF_CLASS(insn->code);
+++
+++	return (class == BPF_JMP || class == BPF_JMP32 ||
+++		class == BPF_STX || class == BPF_ST);
+++}
+++
+++/* Return TRUE if INSN has defined any 32-bit value explicitly. */
+++static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
+++{
+++	if (insn_no_def(insn))
+++		return false;
+++
+++	return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
+++}
+++
+++static void mark_insn_zext(struct bpf_verifier_env *env,
+++			   struct bpf_reg_state *reg)
+++{
+++	s32 def_idx = reg->subreg_def;
+++
+++	if (def_idx == DEF_NOT_SUBREG)
+++		return;
+++
+++	env->insn_aux_data[def_idx - 1].zext_dst = true;
+++	/* The dst will be zero extended, so won't be sub-register anymore. */
+++	reg->subreg_def = DEF_NOT_SUBREG;
+++}
+++
+++static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
++ 			 enum reg_arg_type t)
++ {
+++	struct bpf_verifier_state *vstate = env->cur_state;
+++	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+++	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
+++	struct bpf_reg_state *reg, *regs = state->regs;
+++	bool rw64;
+++
++ 	if (regno >= MAX_BPF_REG) {
++-		verbose("R%d is invalid\n", regno);
+++		verbose(env, "R%d is invalid\n", regno);
++ 		return -EINVAL;
++ 	}
++ 
+++	reg = &regs[regno];
+++	rw64 = is_reg64(env, insn, regno, reg, t);
++ 	if (t == SRC_OP) {
++ 		/* check whether register used as source operand can be read */
++-		if (regs[regno].type == NOT_INIT) {
++-			verbose("R%d !read_ok\n", regno);
+++		if (reg->type == NOT_INIT) {
+++			verbose(env, "R%d !read_ok\n", regno);
++ 			return -EACCES;
++ 		}
+++		/* We don't need to worry about FP liveness because it's read-only */
+++		if (regno == BPF_REG_FP)
+++			return 0;
+++
+++		if (rw64)
+++			mark_insn_zext(env, reg);
+++
+++		return mark_reg_read(env, reg, reg->parent,
+++				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
++ 	} else {
++ 		/* check whether register used as dest operand can be written to */
++ 		if (regno == BPF_REG_FP) {
++-			verbose("frame pointer is read only\n");
+++			verbose(env, "frame pointer is read only\n");
++ 			return -EACCES;
++ 		}
+++		reg->live |= REG_LIVE_WRITTEN;
+++		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
++ 		if (t == DST_OP)
++-			mark_reg_unknown_value(regs, regno);
+++			mark_reg_unknown(env, regs, regno);
++ 	}
++ 	return 0;
++ }
++ 
++-static int bpf_size_to_bytes(int bpf_size)
+++/* for any branch, call, exit record the history of jmps in the given state */
+++static int push_jmp_history(struct bpf_verifier_env *env,
+++			    struct bpf_verifier_state *cur)
++ {
++-	if (bpf_size == BPF_W)
++-		return 4;
++-	else if (bpf_size == BPF_H)
++-		return 2;
++-	else if (bpf_size == BPF_B)
++-		return 1;
++-	else if (bpf_size == BPF_DW)
++-		return 8;
++-	else
++-		return -EINVAL;
+++	u32 cnt = cur->jmp_history_cnt;
+++	struct bpf_idx_pair *p;
+++
+++	cnt++;
+++	p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
+++	if (!p)
+++		return -ENOMEM;
+++	p[cnt - 1].idx = env->insn_idx;
+++	p[cnt - 1].prev_idx = env->prev_insn_idx;
+++	cur->jmp_history = p;
+++	cur->jmp_history_cnt = cnt;
+++	return 0;
+++}
+++
+++/* Backtrack one insn at a time. If idx is not at the top of recorded
+++ * history then previous instruction came from straight line execution.
+++ */
+++static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
+++			     u32 *history)
+++{
+++	u32 cnt = *history;
+++
+++	if (cnt && st->jmp_history[cnt - 1].idx == i) {
+++		i = st->jmp_history[cnt - 1].prev_idx;
+++		(*history)--;
+++	} else {
+++		i--;
+++	}
+++	return i;
+++}
+++
+++/* For given verifier state backtrack_insn() is called from the last insn to
+++ * the first insn. Its purpose is to compute a bitmask of registers and
+++ * stack slots that needs precision in the parent verifier state.
+++ */
+++static int backtrack_insn(struct bpf_verifier_env *env, int idx,
+++			  u32 *reg_mask, u64 *stack_mask)
+++{
+++	const struct bpf_insn_cbs cbs = {
+++		.cb_print	= verbose,
+++		.private_data	= env,
+++	};
+++	struct bpf_insn *insn = env->prog->insnsi + idx;
+++	u8 class = BPF_CLASS(insn->code);
+++	u8 opcode = BPF_OP(insn->code);
+++	u8 mode = BPF_MODE(insn->code);
+++	u32 dreg = 1u << insn->dst_reg;
+++	u32 sreg = 1u << insn->src_reg;
+++	u32 spi;
+++
+++	if (insn->code == 0)
+++		return 0;
+++	if (env->log.level & BPF_LOG_LEVEL) {
+++		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
+++		verbose(env, "%d: ", idx);
+++		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
+++	}
+++
+++	if (class == BPF_ALU || class == BPF_ALU64) {
+++		if (!(*reg_mask & dreg))
+++			return 0;
+++		if (opcode == BPF_MOV) {
+++			if (BPF_SRC(insn->code) == BPF_X) {
+++				/* dreg = sreg
+++				 * dreg needs precision after this insn
+++				 * sreg needs precision before this insn
+++				 */
+++				*reg_mask &= ~dreg;
+++				*reg_mask |= sreg;
+++			} else {
+++				/* dreg = K
+++				 * dreg needs precision after this insn.
+++				 * Corresponding register is already marked
+++				 * as precise=true in this verifier state.
+++				 * No further markings in parent are necessary
+++				 */
+++				*reg_mask &= ~dreg;
+++			}
+++		} else {
+++			if (BPF_SRC(insn->code) == BPF_X) {
+++				/* dreg += sreg
+++				 * both dreg and sreg need precision
+++				 * before this insn
+++				 */
+++				*reg_mask |= sreg;
+++			} /* else dreg += K
+++			   * dreg still needs precision before this insn
+++			   */
+++		}
+++	} else if (class == BPF_LDX) {
+++		if (!(*reg_mask & dreg))
+++			return 0;
+++		*reg_mask &= ~dreg;
+++
+++		/* scalars can only be spilled into stack w/o losing precision.
+++		 * Load from any other memory can be zero extended.
+++		 * The desire to keep that precision is already indicated
+++		 * by 'precise' mark in corresponding register of this state.
+++		 * No further tracking necessary.
+++		 */
+++		if (insn->src_reg != BPF_REG_FP)
+++			return 0;
+++		if (BPF_SIZE(insn->code) != BPF_DW)
+++			return 0;
+++
+++		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
+++		 * that [fp - off] slot contains scalar that needs to be
+++		 * tracked with precision
+++		 */
+++		spi = (-insn->off - 1) / BPF_REG_SIZE;
+++		if (spi >= 64) {
+++			verbose(env, "BUG spi %d\n", spi);
+++			WARN_ONCE(1, "verifier backtracking bug");
+++			return -EFAULT;
+++		}
+++		*stack_mask |= 1ull << spi;
+++	} else if (class == BPF_STX || class == BPF_ST) {
+++		if (*reg_mask & dreg)
+++			/* stx & st shouldn't be using _scalar_ dst_reg
+++			 * to access memory. It means backtracking
+++			 * encountered a case of pointer subtraction.
+++			 */
+++			return -ENOTSUPP;
+++		/* scalars can only be spilled into stack */
+++		if (insn->dst_reg != BPF_REG_FP)
+++			return 0;
+++		if (BPF_SIZE(insn->code) != BPF_DW)
+++			return 0;
+++		spi = (-insn->off - 1) / BPF_REG_SIZE;
+++		if (spi >= 64) {
+++			verbose(env, "BUG spi %d\n", spi);
+++			WARN_ONCE(1, "verifier backtracking bug");
+++			return -EFAULT;
+++		}
+++		if (!(*stack_mask & (1ull << spi)))
+++			return 0;
+++		*stack_mask &= ~(1ull << spi);
+++		if (class == BPF_STX)
+++			*reg_mask |= sreg;
+++	} else if (class == BPF_JMP || class == BPF_JMP32) {
+++		if (opcode == BPF_CALL) {
+++			if (insn->src_reg == BPF_PSEUDO_CALL)
+++				return -ENOTSUPP;
+++			/* regular helper call sets R0 */
+++			*reg_mask &= ~1;
+++			if (*reg_mask & 0x3f) {
+++				/* if backtracing was looking for registers R1-R5
+++				 * they should have been found already.
+++				 */
+++				verbose(env, "BUG regs %x\n", *reg_mask);
+++				WARN_ONCE(1, "verifier backtracking bug");
+++				return -EFAULT;
+++			}
+++		} else if (opcode == BPF_EXIT) {
+++			return -ENOTSUPP;
+++		}
+++	} else if (class == BPF_LD) {
+++		if (!(*reg_mask & dreg))
+++			return 0;
+++		*reg_mask &= ~dreg;
+++		/* It's ld_imm64 or ld_abs or ld_ind.
+++		 * For ld_imm64 no further tracking of precision
+++		 * into parent is necessary
+++		 */
+++		if (mode == BPF_IND || mode == BPF_ABS)
+++			/* to be analyzed */
+++			return -ENOTSUPP;
+++	}
+++	return 0;
+++}
+++
+++/* the scalar precision tracking algorithm:
+++ * . at the start all registers have precise=false.
+++ * . scalar ranges are tracked as normal through alu and jmp insns.
+++ * . once precise value of the scalar register is used in:
+++ *   .  ptr + scalar alu
+++ *   . if (scalar cond K|scalar)
+++ *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
+++ *   backtrack through the verifier states and mark all registers and
+++ *   stack slots with spilled constants that these scalar regisers
+++ *   should be precise.
+++ * . during state pruning two registers (or spilled stack slots)
+++ *   are equivalent if both are not precise.
+++ *
+++ * Note the verifier cannot simply walk register parentage chain,
+++ * since many different registers and stack slots could have been
+++ * used to compute single precise scalar.
+++ *
+++ * The approach of starting with precise=true for all registers and then
+++ * backtrack to mark a register as not precise when the verifier detects
+++ * that program doesn't care about specific value (e.g., when helper
+++ * takes register as ARG_ANYTHING parameter) is not safe.
+++ *
+++ * It's ok to walk single parentage chain of the verifier states.
+++ * It's possible that this backtracking will go all the way till 1st insn.
+++ * All other branches will be explored for needing precision later.
+++ *
+++ * The backtracking needs to deal with cases like:
+++ *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
+++ * r9 -= r8
+++ * r5 = r9
+++ * if r5 > 0x79f goto pc+7
+++ *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
+++ * r5 += 1
+++ * ...
+++ * call bpf_perf_event_output#25
+++ *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
+++ *
+++ * and this case:
+++ * r6 = 1
+++ * call foo // uses callee's r6 inside to compute r0
+++ * r0 += r6
+++ * if r0 == 0 goto
+++ *
+++ * to track above reg_mask/stack_mask needs to be independent for each frame.
+++ *
+++ * Also if parent's curframe > frame where backtracking started,
+++ * the verifier need to mark registers in both frames, otherwise callees
+++ * may incorrectly prune callers. This is similar to
+++ * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
+++ *
+++ * For now backtracking falls back into conservative marking.
+++ */
+++static void mark_all_scalars_precise(struct bpf_verifier_env *env,
+++				     struct bpf_verifier_state *st)
+++{
+++	struct bpf_func_state *func;
+++	struct bpf_reg_state *reg;
+++	int i, j;
+++
+++	/* big hammer: mark all scalars precise in this path.
+++	 * pop_stack may still get !precise scalars.
+++	 */
+++	for (; st; st = st->parent)
+++		for (i = 0; i <= st->curframe; i++) {
+++			func = st->frame[i];
+++			for (j = 0; j < BPF_REG_FP; j++) {
+++				reg = &func->regs[j];
+++				if (reg->type != SCALAR_VALUE)
+++					continue;
+++				reg->precise = true;
+++			}
+++			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
+++				if (func->stack[j].slot_type[0] != STACK_SPILL)
+++					continue;
+++				reg = &func->stack[j].spilled_ptr;
+++				if (reg->type != SCALAR_VALUE)
+++					continue;
+++				reg->precise = true;
+++			}
+++		}
+++}
+++
+++static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
+++				  int spi)
+++{
+++	struct bpf_verifier_state *st = env->cur_state;
+++	int first_idx = st->first_insn_idx;
+++	int last_idx = env->insn_idx;
+++	struct bpf_func_state *func;
+++	struct bpf_reg_state *reg;
+++	u32 reg_mask = regno >= 0 ? 1u << regno : 0;
+++	u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
+++	bool skip_first = true;
+++	bool new_marks = false;
+++	int i, err;
+++
+++	if (!env->allow_ptr_leaks)
+++		/* backtracking is root only for now */
+++		return 0;
+++
+++	func = st->frame[st->curframe];
+++	if (regno >= 0) {
+++		reg = &func->regs[regno];
+++		if (reg->type != SCALAR_VALUE) {
+++			WARN_ONCE(1, "backtracing misuse");
+++			return -EFAULT;
+++		}
+++		if (!reg->precise)
+++			new_marks = true;
+++		else
+++			reg_mask = 0;
+++		reg->precise = true;
+++	}
+++
+++	while (spi >= 0) {
+++		if (func->stack[spi].slot_type[0] != STACK_SPILL) {
+++			stack_mask = 0;
+++			break;
+++		}
+++		reg = &func->stack[spi].spilled_ptr;
+++		if (reg->type != SCALAR_VALUE) {
+++			stack_mask = 0;
+++			break;
+++		}
+++		if (!reg->precise)
+++			new_marks = true;
+++		else
+++			stack_mask = 0;
+++		reg->precise = true;
+++		break;
+++	}
+++
+++	if (!new_marks)
+++		return 0;
+++	if (!reg_mask && !stack_mask)
+++		return 0;
+++	for (;;) {
+++		DECLARE_BITMAP(mask, 64);
+++		u32 history = st->jmp_history_cnt;
+++
+++		if (env->log.level & BPF_LOG_LEVEL)
+++			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
+++		for (i = last_idx;;) {
+++			if (skip_first) {
+++				err = 0;
+++				skip_first = false;
+++			} else {
+++				err = backtrack_insn(env, i, &reg_mask, &stack_mask);
+++			}
+++			if (err == -ENOTSUPP) {
+++				mark_all_scalars_precise(env, st);
+++				return 0;
+++			} else if (err) {
+++				return err;
+++			}
+++			if (!reg_mask && !stack_mask)
+++				/* Found assignment(s) into tracked register in this state.
+++				 * Since this state is already marked, just return.
+++				 * Nothing to be tracked further in the parent state.
+++				 */
+++				return 0;
+++			if (i == first_idx)
+++				break;
+++			i = get_prev_insn_idx(st, i, &history);
+++			if (i >= env->prog->len) {
+++				/* This can happen if backtracking reached insn 0
+++				 * and there are still reg_mask or stack_mask
+++				 * to backtrack.
+++				 * It means the backtracking missed the spot where
+++				 * particular register was initialized with a constant.
+++				 */
+++				verbose(env, "BUG backtracking idx %d\n", i);
+++				WARN_ONCE(1, "verifier backtracking bug");
+++				return -EFAULT;
+++			}
+++		}
+++		st = st->parent;
+++		if (!st)
+++			break;
+++
+++		new_marks = false;
+++		func = st->frame[st->curframe];
+++		bitmap_from_u64(mask, reg_mask);
+++		for_each_set_bit(i, mask, 32) {
+++			reg = &func->regs[i];
+++			if (reg->type != SCALAR_VALUE) {
+++				reg_mask &= ~(1u << i);
+++				continue;
+++			}
+++			if (!reg->precise)
+++				new_marks = true;
+++			reg->precise = true;
+++		}
+++
+++		bitmap_from_u64(mask, stack_mask);
+++		for_each_set_bit(i, mask, 64) {
+++			if (i >= func->allocated_stack / BPF_REG_SIZE) {
+++				/* the sequence of instructions:
+++				 * 2: (bf) r3 = r10
+++				 * 3: (7b) *(u64 *)(r3 -8) = r0
+++				 * 4: (79) r4 = *(u64 *)(r10 -8)
+++				 * doesn't contain jmps. It's backtracked
+++				 * as a single block.
+++				 * During backtracking insn 3 is not recognized as
+++				 * stack access, so at the end of backtracking
+++				 * stack slot fp-8 is still marked in stack_mask.
+++				 * However the parent state may not have accessed
+++				 * fp-8 and it's "unallocated" stack space.
+++				 * In such case fallback to conservative.
+++				 */
+++				mark_all_scalars_precise(env, st);
+++				return 0;
+++			}
+++
+++			if (func->stack[i].slot_type[0] != STACK_SPILL) {
+++				stack_mask &= ~(1ull << i);
+++				continue;
+++			}
+++			reg = &func->stack[i].spilled_ptr;
+++			if (reg->type != SCALAR_VALUE) {
+++				stack_mask &= ~(1ull << i);
+++				continue;
+++			}
+++			if (!reg->precise)
+++				new_marks = true;
+++			reg->precise = true;
+++		}
+++		if (env->log.level & BPF_LOG_LEVEL) {
+++			print_verifier_state(env, func);
+++			verbose(env, "parent %s regs=%x stack=%llx marks\n",
+++				new_marks ? "didn't have" : "already had",
+++				reg_mask, stack_mask);
+++		}
+++
+++		if (!reg_mask && !stack_mask)
+++			break;
+++		if (!new_marks)
+++			break;
+++
+++		last_idx = st->last_insn_idx;
+++		first_idx = st->first_insn_idx;
+++	}
+++	return 0;
+++}
+++
+++static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
+++{
+++	return __mark_chain_precision(env, regno, -1);
+++}
+++
+++static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
+++{
+++	return __mark_chain_precision(env, -1, spi);
++ }
++ 
++ static bool is_spillable_regtype(enum bpf_reg_type type)
++@@ -549,129 +1842,932 @@ static bool is_spillable_regtype(enum bp
++ 	case PTR_TO_MAP_VALUE_OR_NULL:
++ 	case PTR_TO_STACK:
++ 	case PTR_TO_CTX:
++-	case FRAME_PTR:
+++	case PTR_TO_PACKET:
+++	case PTR_TO_PACKET_META:
+++	case PTR_TO_PACKET_END:
+++	case PTR_TO_FLOW_KEYS:
++ 	case CONST_PTR_TO_MAP:
+++	case PTR_TO_SOCKET:
+++	case PTR_TO_SOCKET_OR_NULL:
+++	case PTR_TO_SOCK_COMMON:
+++	case PTR_TO_SOCK_COMMON_OR_NULL:
+++	case PTR_TO_TCP_SOCK:
+++	case PTR_TO_TCP_SOCK_OR_NULL:
+++	case PTR_TO_XDP_SOCK:
++ 		return true;
++ 	default:
++ 		return false;
++ 	}
++ }
++ 
+++/* Does this register contain a constant zero? */
+++static bool register_is_null(struct bpf_reg_state *reg)
+++{
+++	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
+++}
+++
+++static bool register_is_const(struct bpf_reg_state *reg)
+++{
+++	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
+++}
+++
+++static bool __is_pointer_value(bool allow_ptr_leaks,
+++			       const struct bpf_reg_state *reg)
+++{
+++	if (allow_ptr_leaks)
+++		return false;
+++
+++	return reg->type != SCALAR_VALUE;
+++}
+++
+++static void save_register_state(struct bpf_func_state *state,
+++				int spi, struct bpf_reg_state *reg)
+++{
+++	int i;
+++
+++	state->stack[spi].spilled_ptr = *reg;
+++	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+++
+++	for (i = 0; i < BPF_REG_SIZE; i++)
+++		state->stack[spi].slot_type[i] = STACK_SPILL;
+++}
+++
++ /* check_stack_read/write functions track spill/fill of registers,
++  * stack boundary and alignment are checked in check_mem_access()
++  */
++-static int check_stack_write(struct verifier_state *state, int off, int size,
++-			     int value_regno)
+++static int check_stack_write(struct bpf_verifier_env *env,
+++			     struct bpf_func_state *state, /* func where register points to */
+++			     int off, int size, int value_regno, int insn_idx)
++ {
++-	int i;
+++	struct bpf_func_state *cur; /* state of the current function */
+++	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
+++	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
+++	struct bpf_reg_state *reg = NULL;
+++
+++	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
+++				 state->acquired_refs, true);
+++	if (err)
+++		return err;
++ 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
++ 	 * so it's aligned access and [off, off + size) are within stack limits
++ 	 */
+++	if (!env->allow_ptr_leaks &&
+++	    state->stack[spi].slot_type[0] == STACK_SPILL &&
+++	    size != BPF_REG_SIZE) {
+++		verbose(env, "attempt to corrupt spilled pointer on stack\n");
+++		return -EACCES;
+++	}
++ 
++-	if (value_regno >= 0 &&
++-	    is_spillable_regtype(state->regs[value_regno].type)) {
++-
+++	cur = env->cur_state->frame[env->cur_state->curframe];
+++	if (value_regno >= 0)
+++		reg = &cur->regs[value_regno];
+++
+++	if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
+++	    !register_is_null(reg) && env->allow_ptr_leaks) {
+++		if (dst_reg != BPF_REG_FP) {
+++			/* The backtracking logic can only recognize explicit
+++			 * stack slot address like [fp - 8]. Other spill of
+++			 * scalar via different register has to be conervative.
+++			 * Backtrack from here and mark all registers as precise
+++			 * that contributed into 'reg' being a constant.
+++			 */
+++			err = mark_chain_precision(env, value_regno);
+++			if (err)
+++				return err;
+++		}
+++		save_register_state(state, spi, reg);
+++	} else if (reg && is_spillable_regtype(reg->type)) {
++ 		/* register containing pointer is being spilled into stack */
++ 		if (size != BPF_REG_SIZE) {
++-			verbose("invalid size of register spill\n");
+++			verbose_linfo(env, insn_idx, "; ");
+++			verbose(env, "invalid size of register spill\n");
++ 			return -EACCES;
++ 		}
++ 
++-		/* save register state */
++-		state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
++-			state->regs[value_regno];
+++		if (state != cur && reg->type == PTR_TO_STACK) {
+++			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
+++			return -EINVAL;
+++		}
++ 
++-		for (i = 0; i < BPF_REG_SIZE; i++)
++-			state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
+++		if (!env->allow_ptr_leaks) {
+++			bool sanitize = false;
+++
+++			if (state->stack[spi].slot_type[0] == STACK_SPILL &&
+++			    register_is_const(&state->stack[spi].spilled_ptr))
+++				sanitize = true;
+++			for (i = 0; i < BPF_REG_SIZE; i++)
+++				if (state->stack[spi].slot_type[i] == STACK_MISC) {
+++					sanitize = true;
+++					break;
+++				}
+++			if (sanitize) {
+++				int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
+++				int soff = (-spi - 1) * BPF_REG_SIZE;
+++
+++				/* detected reuse of integer stack slot with a pointer
+++				 * which means either llvm is reusing stack slot or
+++				 * an attacker is trying to exploit CVE-2018-3639
+++				 * (speculative store bypass)
+++				 * Have to sanitize that slot with preemptive
+++				 * store of zero.
+++				 */
+++				if (*poff && *poff != soff) {
+++					/* disallow programs where single insn stores
+++					 * into two different stack slots, since verifier
+++					 * cannot sanitize them
+++					 */
+++					verbose(env,
+++						"insn %d cannot access two stack slots fp%d and fp%d",
+++						insn_idx, *poff, soff);
+++					return -EINVAL;
+++				}
+++				*poff = soff;
+++			}
+++		}
+++		save_register_state(state, spi, reg);
++ 	} else {
++-		/* regular write of data into stack */
++-		state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
++-			(struct reg_state) {};
+++		u8 type = STACK_MISC;
+++
+++		/* regular write of data into stack destroys any spilled ptr */
+++		state->stack[spi].spilled_ptr.type = NOT_INIT;
+++		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
+++		if (state->stack[spi].slot_type[0] == STACK_SPILL)
+++			for (i = 0; i < BPF_REG_SIZE; i++)
+++				state->stack[spi].slot_type[i] = STACK_MISC;
+++
+++		/* only mark the slot as written if all 8 bytes were written
+++		 * otherwise read propagation may incorrectly stop too soon
+++		 * when stack slots are partially written.
+++		 * This heuristic means that read propagation will be
+++		 * conservative, since it will add reg_live_read marks
+++		 * to stack slots all the way to first state when programs
+++		 * writes+reads less than 8 bytes
+++		 */
+++		if (size == BPF_REG_SIZE)
+++			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+++
+++		/* when we zero initialize stack slots mark them as such */
+++		if (reg && register_is_null(reg)) {
+++			/* backtracking doesn't work for STACK_ZERO yet. */
+++			err = mark_chain_precision(env, value_regno);
+++			if (err)
+++				return err;
+++			type = STACK_ZERO;
+++		}
++ 
+++		/* Mark slots affected by this stack write. */
++ 		for (i = 0; i < size; i++)
++-			state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
+++			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
+++				type;
++ 	}
++ 	return 0;
++ }
++ 
++-static int check_stack_read(struct verifier_state *state, int off, int size,
++-			    int value_regno)
+++static int check_stack_read(struct bpf_verifier_env *env,
+++			    struct bpf_func_state *reg_state /* func where register points to */,
+++			    int off, int size, int value_regno)
++ {
++-	u8 *slot_type;
++-	int i;
++-
++-	slot_type = &state->stack_slot_type[MAX_BPF_STACK + off];
+++	struct bpf_verifier_state *vstate = env->cur_state;
+++	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+++	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
+++	struct bpf_reg_state *reg;
+++	u8 *stype;
+++
+++	if (reg_state->allocated_stack <= slot) {
+++		verbose(env, "invalid read from stack off %d+0 size %d\n",
+++			off, size);
+++		return -EACCES;
+++	}
+++	stype = reg_state->stack[spi].slot_type;
+++	reg = &reg_state->stack[spi].spilled_ptr;
++ 
++-	if (slot_type[0] == STACK_SPILL) {
+++	if (stype[0] == STACK_SPILL) {
++ 		if (size != BPF_REG_SIZE) {
++-			verbose("invalid size of register spill\n");
++-			return -EACCES;
+++			if (reg->type != SCALAR_VALUE) {
+++				verbose_linfo(env, env->insn_idx, "; ");
+++				verbose(env, "invalid size of register fill\n");
+++				return -EACCES;
+++			}
+++			if (value_regno >= 0) {
+++				mark_reg_unknown(env, state->regs, value_regno);
+++				state->regs[value_regno].live |= REG_LIVE_WRITTEN;
+++			}
+++			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
+++			return 0;
++ 		}
++ 		for (i = 1; i < BPF_REG_SIZE; i++) {
++-			if (slot_type[i] != STACK_SPILL) {
++-				verbose("corrupted spill memory\n");
+++			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
+++				verbose(env, "corrupted spill memory\n");
++ 				return -EACCES;
++ 			}
++ 		}
++ 
++-		if (value_regno >= 0)
+++		if (value_regno >= 0) {
++ 			/* restore register state from stack */
++-			state->regs[value_regno] =
++-				state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE];
++-		return 0;
+++			state->regs[value_regno] = *reg;
+++			/* mark reg as written since spilled pointer state likely
+++			 * has its liveness marks cleared by is_state_visited()
+++			 * which resets stack/reg liveness for state transitions
+++			 */
+++			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
+++		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
+++			/* If value_regno==-1, the caller is asking us whether
+++			 * it is acceptable to use this value as a SCALAR_VALUE
+++			 * (e.g. for XADD).
+++			 * We must not allow unprivileged callers to do that
+++			 * with spilled pointers.
+++			 */
+++			verbose(env, "leaking pointer from stack off %d\n",
+++				off);
+++			return -EACCES;
+++		}
+++		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
++ 	} else {
+++		int zeros = 0;
+++
++ 		for (i = 0; i < size; i++) {
++-			if (slot_type[i] != STACK_MISC) {
++-				verbose("invalid read from stack off %d+%d size %d\n",
++-					off, i, size);
++-				return -EACCES;
+++			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
+++				continue;
+++			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
+++				zeros++;
+++				continue;
++ 			}
+++			verbose(env, "invalid read from stack off %d+%d size %d\n",
+++				off, i, size);
+++			return -EACCES;
+++		}
+++		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
+++		if (value_regno >= 0) {
+++			if (zeros == size) {
+++				/* any size read into register is zero extended,
+++				 * so the whole register == const_zero
+++				 */
+++				__mark_reg_const_zero(&state->regs[value_regno]);
+++				/* backtracking doesn't support STACK_ZERO yet,
+++				 * so mark it precise here, so that later
+++				 * backtracking can stop here.
+++				 * Backtracking may not need this if this register
+++				 * doesn't participate in pointer adjustment.
+++				 * Forward propagation of precise flag is not
+++				 * necessary either. This mark is only to stop
+++				 * backtracking. Any register that contributed
+++				 * to const 0 was marked precise before spill.
+++				 */
+++				state->regs[value_regno].precise = true;
+++			} else {
+++				/* have read misc data from the stack */
+++				mark_reg_unknown(env, state->regs, value_regno);
+++			}
+++			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
++ 		}
++-		if (value_regno >= 0)
++-			/* have read misc data from the stack */
++-			mark_reg_unknown_value(state->regs, value_regno);
++-		return 0;
++ 	}
+++	return 0;
+++}
+++
+++static int check_stack_access(struct bpf_verifier_env *env,
+++			      const struct bpf_reg_state *reg,
+++			      int off, int size)
+++{
+++	/* Stack accesses must be at a fixed offset, so that we
+++	 * can determine what type of data were returned. See
+++	 * check_stack_read().
+++	 */
+++	if (!tnum_is_const(reg->var_off)) {
+++		char tn_buf[48];
+++
+++		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+++		verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
+++			tn_buf, off, size);
+++		return -EACCES;
+++	}
+++
+++	if (off >= 0 || off < -MAX_BPF_STACK) {
+++		verbose(env, "invalid stack off=%d size=%d\n", off, size);
+++		return -EACCES;
+++	}
+++
+++	return 0;
+++}
+++
+++static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
+++				 int off, int size, enum bpf_access_type type)
+++{
+++	struct bpf_reg_state *regs = cur_regs(env);
+++	struct bpf_map *map = regs[regno].map_ptr;
+++	u32 cap = bpf_map_flags_to_cap(map);
+++
+++	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
+++		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
+++			map->value_size, off, size);
+++		return -EACCES;
+++	}
+++
+++	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
+++		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
+++			map->value_size, off, size);
+++		return -EACCES;
+++	}
+++
+++	return 0;
++ }
++ 
++ /* check read/write into map element returned by bpf_map_lookup_elem() */
++-static int check_map_access(struct verifier_env *env, u32 regno, int off,
++-			    int size)
+++static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
+++			      int size, bool zero_size_allowed)
++ {
++-	struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
+++	struct bpf_reg_state *regs = cur_regs(env);
+++	struct bpf_map *map = regs[regno].map_ptr;
++ 
++-	if (off < 0 || off + size > map->value_size) {
++-		verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
+++	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
+++	    off + size > map->value_size) {
+++		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
++ 			map->value_size, off, size);
++ 		return -EACCES;
++ 	}
++ 	return 0;
++ }
++ 
++-/* check access to 'struct bpf_context' fields */
++-static int check_ctx_access(struct verifier_env *env, int off, int size,
++-			    enum bpf_access_type t)
+++/* check read/write into a map element with possible variable offset */
+++static int check_map_access(struct bpf_verifier_env *env, u32 regno,
+++			    int off, int size, bool zero_size_allowed)
+++{
+++	struct bpf_verifier_state *vstate = env->cur_state;
+++	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+++	struct bpf_reg_state *reg = &state->regs[regno];
+++	int err;
+++
+++	/* We may have adjusted the register to this map value, so we
+++	 * need to try adding each of min_value and max_value to off
+++	 * to make sure our theoretical access will be safe.
+++	 */
+++	if (env->log.level & BPF_LOG_LEVEL)
+++		print_verifier_state(env, state);
+++
+++	/* The minimum value is only important with signed
+++	 * comparisons where we can't assume the floor of a
+++	 * value is 0.  If we are using signed variables for our
+++	 * index'es we need to make sure that whatever we use
+++	 * will have a set floor within our range.
+++	 */
+++	if (reg->smin_value < 0 &&
+++	    (reg->smin_value == S64_MIN ||
+++	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
+++	      reg->smin_value + off < 0)) {
+++		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+++			regno);
+++		return -EACCES;
+++	}
+++	err = __check_map_access(env, regno, reg->smin_value + off, size,
+++				 zero_size_allowed);
+++	if (err) {
+++		verbose(env, "R%d min value is outside of the array range\n",
+++			regno);
+++		return err;
+++	}
+++
+++	/* If we haven't set a max value then we need to bail since we can't be
+++	 * sure we won't do bad things.
+++	 * If reg->umax_value + off could overflow, treat that as unbounded too.
+++	 */
+++	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
+++		verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
+++			regno);
+++		return -EACCES;
+++	}
+++	err = __check_map_access(env, regno, reg->umax_value + off, size,
+++				 zero_size_allowed);
+++	if (err)
+++		verbose(env, "R%d max value is outside of the array range\n",
+++			regno);
+++
+++	if (map_value_has_spin_lock(reg->map_ptr)) {
+++		u32 lock = reg->map_ptr->spin_lock_off;
+++
+++		/* if any part of struct bpf_spin_lock can be touched by
+++		 * load/store reject this program.
+++		 * To check that [x1, x2) overlaps with [y1, y2)
+++		 * it is sufficient to check x1 < y2 && y1 < x2.
+++		 */
+++		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
+++		     lock < reg->umax_value + off + size) {
+++			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
+++			return -EACCES;
+++		}
+++	}
+++	return err;
+++}
+++
+++#define MAX_PACKET_OFF 0xffff
+++
+++static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
+++				       const struct bpf_call_arg_meta *meta,
+++				       enum bpf_access_type t)
+++{
+++	switch (env->prog->type) {
+++	/* Program types only with direct read access go here! */
+++	case BPF_PROG_TYPE_LWT_IN:
+++	case BPF_PROG_TYPE_LWT_OUT:
+++	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
+++	case BPF_PROG_TYPE_SK_REUSEPORT:
+++	case BPF_PROG_TYPE_FLOW_DISSECTOR:
+++	case BPF_PROG_TYPE_CGROUP_SKB:
+++		if (t == BPF_WRITE)
+++			return false;
+++		/* fallthrough */
+++
+++	/* Program types with direct read + write access go here! */
+++	case BPF_PROG_TYPE_SCHED_CLS:
+++	case BPF_PROG_TYPE_SCHED_ACT:
+++	case BPF_PROG_TYPE_XDP:
+++	case BPF_PROG_TYPE_LWT_XMIT:
+++	case BPF_PROG_TYPE_SK_SKB:
+++	case BPF_PROG_TYPE_SK_MSG:
+++		if (meta)
+++			return meta->pkt_access;
+++
+++		env->seen_direct_write = true;
+++		return true;
+++
+++	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+++		if (t == BPF_WRITE)
+++			env->seen_direct_write = true;
+++
+++		return true;
+++
+++	default:
+++		return false;
+++	}
+++}
+++
+++static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
+++				 int off, int size, bool zero_size_allowed)
+++{
+++	struct bpf_reg_state *regs = cur_regs(env);
+++	struct bpf_reg_state *reg = &regs[regno];
+++
+++	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
+++	    (u64)off + size > reg->range) {
+++		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
+++			off, size, regno, reg->id, reg->off, reg->range);
+++		return -EACCES;
+++	}
+++	return 0;
+++}
+++
+++static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
+++			       int size, bool zero_size_allowed)
+++{
+++	struct bpf_reg_state *regs = cur_regs(env);
+++	struct bpf_reg_state *reg = &regs[regno];
+++	int err;
+++
+++	/* We may have added a variable offset to the packet pointer; but any
+++	 * reg->range we have comes after that.  We are only checking the fixed
+++	 * offset.
+++	 */
+++
+++	/* We don't allow negative numbers, because we aren't tracking enough
+++	 * detail to prove they're safe.
+++	 */
+++	if (reg->smin_value < 0) {
+++		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+++			regno);
+++		return -EACCES;
+++	}
+++	err = __check_packet_access(env, regno, off, size, zero_size_allowed);
+++	if (err) {
+++		verbose(env, "R%d offset is outside of the packet\n", regno);
+++		return err;
+++	}
+++
+++	/* __check_packet_access has made sure "off + size - 1" is within u16.
+++	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
+++	 * otherwise find_good_pkt_pointers would have refused to set range info
+++	 * that __check_packet_access would have rejected this pkt access.
+++	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
+++	 */
+++	env->prog->aux->max_pkt_offset =
+++		max_t(u32, env->prog->aux->max_pkt_offset,
+++		      off + reg->umax_value + size - 1);
+++
+++	return err;
+++}
+++
+++/* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
+++static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
+++			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
++ {
++-	if (env->prog->aux->ops->is_valid_access &&
++-	    env->prog->aux->ops->is_valid_access(off, size, t))
+++	struct bpf_insn_access_aux info = {
+++		.reg_type = *reg_type,
+++	};
+++
+++	if (env->ops->is_valid_access &&
+++	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
+++		/* A non zero info.ctx_field_size indicates that this field is a
+++		 * candidate for later verifier transformation to load the whole
+++		 * field and then apply a mask when accessed with a narrower
+++		 * access than actual ctx access size. A zero info.ctx_field_size
+++		 * will only allow for whole field access and rejects any other
+++		 * type of narrower access.
+++		 */
+++		*reg_type = info.reg_type;
+++
+++		env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
+++		/* remember the offset of last byte accessed in ctx */
+++		if (env->prog->aux->max_ctx_offset < off + size)
+++			env->prog->aux->max_ctx_offset = off + size;
++ 		return 0;
+++	}
++ 
++-	verbose("invalid bpf_context access off=%d size=%d\n", off, size);
+++	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
++ 	return -EACCES;
++ }
++ 
++-static bool is_pointer_value(struct verifier_env *env, int regno)
+++static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
+++				  int size)
++ {
++-	if (env->allow_ptr_leaks)
++-		return false;
+++	if (size < 0 || off < 0 ||
+++	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
+++		verbose(env, "invalid access to flow keys off=%d size=%d\n",
+++			off, size);
+++		return -EACCES;
+++	}
+++	return 0;
+++}
++ 
++-	switch (env->cur_state.regs[regno].type) {
++-	case UNKNOWN_VALUE:
++-	case CONST_IMM:
++-		return false;
+++static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
+++			     u32 regno, int off, int size,
+++			     enum bpf_access_type t)
+++{
+++	struct bpf_reg_state *regs = cur_regs(env);
+++	struct bpf_reg_state *reg = &regs[regno];
+++	struct bpf_insn_access_aux info = {};
+++	bool valid;
+++
+++	if (reg->smin_value < 0) {
+++		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+++			regno);
+++		return -EACCES;
+++	}
+++
+++	switch (reg->type) {
+++	case PTR_TO_SOCK_COMMON:
+++		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
+++		break;
++ 	default:
++-		return true;
+++		valid = false;
+++	}
+++
+++
+++	if (valid) {
+++		env->insn_aux_data[insn_idx].ctx_field_size =
+++			info.ctx_field_size;
+++		return 0;
+++	}
+++
+++	verbose(env, "R%d invalid %s access off=%d size=%d\n",
+++		regno, reg_type_str[reg->type], off, size);
+++
+++	return -EACCES;
+++}
+++
+++static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
+++{
+++	return cur_regs(env) + regno;
+++}
+++
+++static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
+++{
+++	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
+++}
+++
+++static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
+++{
+++	const struct bpf_reg_state *reg = reg_state(env, regno);
+++
+++	return reg->type == PTR_TO_CTX;
+++}
+++
+++static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
+++{
+++	const struct bpf_reg_state *reg = reg_state(env, regno);
+++
+++	return type_is_sk_pointer(reg->type);
+++}
+++
+++static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
+++{
+++	const struct bpf_reg_state *reg = reg_state(env, regno);
+++
+++	return type_is_pkt_pointer(reg->type);
+++}
+++
+++static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
+++{
+++	const struct bpf_reg_state *reg = reg_state(env, regno);
+++
+++	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
+++	return reg->type == PTR_TO_FLOW_KEYS;
+++}
+++
+++static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
+++				   const struct bpf_reg_state *reg,
+++				   int off, int size, bool strict)
+++{
+++	struct tnum reg_off;
+++	int ip_align;
+++
+++	/* Byte size accesses are always allowed. */
+++	if (!strict || size == 1)
+++		return 0;
+++
+++	/* For platforms that do not have a Kconfig enabling
+++	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
+++	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
+++	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
+++	 * to this code only in strict mode where we want to emulate
+++	 * the NET_IP_ALIGN==2 checking.  Therefore use an
+++	 * unconditional IP align value of '2'.
+++	 */
+++	ip_align = 2;
+++
+++	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
+++	if (!tnum_is_aligned(reg_off, size)) {
+++		char tn_buf[48];
+++
+++		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+++		verbose(env,
+++			"misaligned packet access off %d+%s+%d+%d size %d\n",
+++			ip_align, tn_buf, reg->off, off, size);
+++		return -EACCES;
+++	}
+++
+++	return 0;
+++}
+++
+++static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
+++				       const struct bpf_reg_state *reg,
+++				       const char *pointer_desc,
+++				       int off, int size, bool strict)
+++{
+++	struct tnum reg_off;
+++
+++	/* Byte size accesses are always allowed. */
+++	if (!strict || size == 1)
+++		return 0;
+++
+++	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
+++	if (!tnum_is_aligned(reg_off, size)) {
+++		char tn_buf[48];
+++
+++		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+++		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
+++			pointer_desc, tn_buf, reg->off, off, size);
+++		return -EACCES;
+++	}
+++
+++	return 0;
+++}
+++
+++static int check_ptr_alignment(struct bpf_verifier_env *env,
+++			       const struct bpf_reg_state *reg, int off,
+++			       int size, bool strict_alignment_once)
+++{
+++	bool strict = env->strict_alignment || strict_alignment_once;
+++	const char *pointer_desc = "";
+++
+++	switch (reg->type) {
+++	case PTR_TO_PACKET:
+++	case PTR_TO_PACKET_META:
+++		/* Special case, because of NET_IP_ALIGN. Given metadata sits
+++		 * right in front, treat it the very same way.
+++		 */
+++		return check_pkt_ptr_alignment(env, reg, off, size, strict);
+++	case PTR_TO_FLOW_KEYS:
+++		pointer_desc = "flow keys ";
+++		break;
+++	case PTR_TO_MAP_VALUE:
+++		pointer_desc = "value ";
+++		break;
+++	case PTR_TO_CTX:
+++		pointer_desc = "context ";
+++		break;
+++	case PTR_TO_STACK:
+++		pointer_desc = "stack ";
+++		/* The stack spill tracking logic in check_stack_write()
+++		 * and check_stack_read() relies on stack accesses being
+++		 * aligned.
+++		 */
+++		strict = true;
+++		break;
+++	case PTR_TO_SOCKET:
+++		pointer_desc = "sock ";
+++		break;
+++	case PTR_TO_SOCK_COMMON:
+++		pointer_desc = "sock_common ";
+++		break;
+++	case PTR_TO_TCP_SOCK:
+++		pointer_desc = "tcp_sock ";
+++		break;
+++	case PTR_TO_XDP_SOCK:
+++		pointer_desc = "xdp_sock ";
+++		break;
+++	default:
+++		break;
+++	}
+++	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
+++					   strict);
+++}
+++
+++static int update_stack_depth(struct bpf_verifier_env *env,
+++			      const struct bpf_func_state *func,
+++			      int off)
+++{
+++	u16 stack = env->subprog_info[func->subprogno].stack_depth;
+++
+++	if (stack >= -off)
+++		return 0;
+++
+++	/* update known max for given subprogram */
+++	env->subprog_info[func->subprogno].stack_depth = -off;
+++	return 0;
+++}
+++
+++/* starting from main bpf function walk all instructions of the function
+++ * and recursively walk all callees that given function can call.
+++ * Ignore jump and exit insns.
+++ * Since recursion is prevented by check_cfg() this algorithm
+++ * only needs a local stack of MAX_CALL_FRAMES to remember callsites
+++ */
+++static int check_max_stack_depth(struct bpf_verifier_env *env)
+++{
+++	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
+++	struct bpf_subprog_info *subprog = env->subprog_info;
+++	struct bpf_insn *insn = env->prog->insnsi;
+++	int ret_insn[MAX_CALL_FRAMES];
+++	int ret_prog[MAX_CALL_FRAMES];
+++
+++process_func:
+++	/* protect against potential stack overflow that might happen when
+++	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
+++	 * depth for such case down to 256 so that the worst case scenario
+++	 * would result in 8k stack size (32 which is tailcall limit * 256 =
+++	 * 8k).
+++	 *
+++	 * To get the idea what might happen, see an example:
+++	 * func1 -> sub rsp, 128
+++	 *  subfunc1 -> sub rsp, 256
+++	 *  tailcall1 -> add rsp, 256
+++	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
+++	 *   subfunc2 -> sub rsp, 64
+++	 *   subfunc22 -> sub rsp, 128
+++	 *   tailcall2 -> add rsp, 128
+++	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
+++	 *
+++	 * tailcall will unwind the current stack frame but it will not get rid
+++	 * of caller's stack as shown on the example above.
+++	 */
+++	if (idx && subprog[idx].has_tail_call && depth >= 256) {
+++		verbose(env,
+++			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
+++			depth);
+++		return -EACCES;
+++	}
+++	/* round up to 32-bytes, since this is granularity
+++	 * of interpreter stack size
+++	 */
+++	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
+++	if (depth > MAX_BPF_STACK) {
+++		verbose(env, "combined stack size of %d calls is %d. Too large\n",
+++			frame + 1, depth);
+++		return -EACCES;
+++	}
+++continue_func:
+++	subprog_end = subprog[idx + 1].start;
+++	for (; i < subprog_end; i++) {
+++		if (insn[i].code != (BPF_JMP | BPF_CALL))
+++			continue;
+++		if (insn[i].src_reg != BPF_PSEUDO_CALL)
+++			continue;
+++		/* remember insn and function to return to */
+++		ret_insn[frame] = i + 1;
+++		ret_prog[frame] = idx;
+++
+++		/* find the callee */
+++		i = i + insn[i].imm + 1;
+++		idx = find_subprog(env, i);
+++		if (idx < 0) {
+++			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+++				  i);
+++			return -EFAULT;
+++		}
+++		frame++;
+++		if (frame >= MAX_CALL_FRAMES) {
+++			verbose(env, "the call stack of %d frames is too deep !\n",
+++				frame);
+++			return -E2BIG;
+++		}
+++		goto process_func;
+++	}
+++	/* end of for() loop means the last insn of the 'subprog'
+++	 * was reached. Doesn't matter whether it was JA or EXIT
+++	 */
+++	if (frame == 0)
+++		return 0;
+++	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
+++	frame--;
+++	i = ret_insn[frame];
+++	idx = ret_prog[frame];
+++	goto continue_func;
+++}
+++
+++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+++static int get_callee_stack_depth(struct bpf_verifier_env *env,
+++				  const struct bpf_insn *insn, int idx)
+++{
+++	int start = idx + insn->imm + 1, subprog;
+++
+++	subprog = find_subprog(env, start);
+++	if (subprog < 0) {
+++		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+++			  start);
+++		return -EFAULT;
++ 	}
+++	return env->subprog_info[subprog].stack_depth;
+++}
+++#endif
+++
+++static int check_ctx_reg(struct bpf_verifier_env *env,
+++			 const struct bpf_reg_state *reg, int regno)
+++{
+++	/* Access to ctx or passing it to a helper is only allowed in
+++	 * its original, unmodified form.
+++	 */
+++
+++	if (reg->off) {
+++		verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
+++			regno, reg->off);
+++		return -EACCES;
+++	}
+++
+++	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+++		char tn_buf[48];
+++
+++		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+++		verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
+++		return -EACCES;
+++	}
+++
+++	return 0;
+++}
+++
+++static int check_tp_buffer_access(struct bpf_verifier_env *env,
+++				  const struct bpf_reg_state *reg,
+++				  int regno, int off, int size)
+++{
+++	if (off < 0) {
+++		verbose(env,
+++			"R%d invalid tracepoint buffer access: off=%d, size=%d",
+++			regno, off, size);
+++		return -EACCES;
+++	}
+++	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+++		char tn_buf[48];
+++
+++		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+++		verbose(env,
+++			"R%d invalid variable buffer offset: off=%d, var_off=%s",
+++			regno, off, tn_buf);
+++		return -EACCES;
+++	}
+++	if (off + size > env->prog->aux->max_tp_access)
+++		env->prog->aux->max_tp_access = off + size;
+++
+++	return 0;
+++}
+++
+++
+++/* truncate register to smaller size (in bytes)
+++ * must be called with size < BPF_REG_SIZE
+++ */
+++static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
+++{
+++	u64 mask;
+++
+++	/* clear high bits in bit representation */
+++	reg->var_off = tnum_cast(reg->var_off, size);
+++
+++	/* fix arithmetic bounds */
+++	mask = ((u64)1 << (size * 8)) - 1;
+++	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
+++		reg->umin_value &= mask;
+++		reg->umax_value &= mask;
+++	} else {
+++		reg->umin_value = 0;
+++		reg->umax_value = mask;
+++	}
+++	reg->smin_value = reg->umin_value;
+++	reg->smax_value = reg->umax_value;
++ }
++ 
++ /* check whether memory at (regno + off) is accessible for t = (read | write)
++@@ -680,225 +2776,692 @@ static bool is_pointer_value(struct veri
++  * if t==write && value_regno==-1, some unknown value is stored into memory
++  * if t==read && value_regno==-1, don't care what we read from memory
++  */
++-static int check_mem_access(struct verifier_env *env, u32 regno, int off,
++-			    int bpf_size, enum bpf_access_type t,
++-			    int value_regno)
+++static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
+++			    int off, int bpf_size, enum bpf_access_type t,
+++			    int value_regno, bool strict_alignment_once)
++ {
++-	struct verifier_state *state = &env->cur_state;
+++	struct bpf_reg_state *regs = cur_regs(env);
+++	struct bpf_reg_state *reg = regs + regno;
+++	struct bpf_func_state *state;
++ 	int size, err = 0;
++ 
++-	if (state->regs[regno].type == PTR_TO_STACK)
++-		off += state->regs[regno].imm;
++-
++ 	size = bpf_size_to_bytes(bpf_size);
++ 	if (size < 0)
++ 		return size;
++ 
++-	if (off % size != 0) {
++-		verbose("misaligned access off %d size %d\n", off, size);
++-		return -EACCES;
++-	}
+++	/* alignment checks will add in reg->off themselves */
+++	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
+++	if (err)
+++		return err;
+++
+++	/* for access checks, reg->off is just part of off */
+++	off += reg->off;
++ 
++-	if (state->regs[regno].type == PTR_TO_MAP_VALUE) {
+++	if (reg->type == PTR_TO_MAP_VALUE) {
++ 		if (t == BPF_WRITE && value_regno >= 0 &&
++ 		    is_pointer_value(env, value_regno)) {
++-			verbose("R%d leaks addr into map\n", value_regno);
+++			verbose(env, "R%d leaks addr into map\n", value_regno);
++ 			return -EACCES;
++ 		}
++-		err = check_map_access(env, regno, off, size);
+++		err = check_map_access_type(env, regno, off, size, t);
+++		if (err)
+++			return err;
+++		err = check_map_access(env, regno, off, size, false);
++ 		if (!err && t == BPF_READ && value_regno >= 0)
++-			mark_reg_unknown_value(state->regs, value_regno);
+++			mark_reg_unknown(env, regs, value_regno);
+++
+++	} else if (reg->type == PTR_TO_CTX) {
+++		enum bpf_reg_type reg_type = SCALAR_VALUE;
++ 
++-	} else if (state->regs[regno].type == PTR_TO_CTX) {
++ 		if (t == BPF_WRITE && value_regno >= 0 &&
++ 		    is_pointer_value(env, value_regno)) {
++-			verbose("R%d leaks addr into ctx\n", value_regno);
+++			verbose(env, "R%d leaks addr into ctx\n", value_regno);
++ 			return -EACCES;
++ 		}
++-		err = check_ctx_access(env, off, size, t);
++-		if (!err && t == BPF_READ && value_regno >= 0)
++-			mark_reg_unknown_value(state->regs, value_regno);
++ 
++-	} else if (state->regs[regno].type == FRAME_PTR ||
++-		   state->regs[regno].type == PTR_TO_STACK) {
++-		if (off >= 0 || off < -MAX_BPF_STACK) {
++-			verbose("invalid stack off=%d size=%d\n", off, size);
+++		err = check_ctx_reg(env, reg, regno);
+++		if (err < 0)
+++			return err;
+++
+++		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
+++		if (!err && t == BPF_READ && value_regno >= 0) {
+++			/* ctx access returns either a scalar, or a
+++			 * PTR_TO_PACKET[_META,_END]. In the latter
+++			 * case, we know the offset is zero.
+++			 */
+++			if (reg_type == SCALAR_VALUE) {
+++				mark_reg_unknown(env, regs, value_regno);
+++			} else {
+++				mark_reg_known_zero(env, regs,
+++						    value_regno);
+++				if (reg_type_may_be_null(reg_type))
+++					regs[value_regno].id = ++env->id_gen;
+++				/* A load of ctx field could have different
+++				 * actual load size with the one encoded in the
+++				 * insn. When the dst is PTR, it is for sure not
+++				 * a sub-register.
+++				 */
+++				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
+++			}
+++			regs[value_regno].type = reg_type;
+++		}
+++
+++	} else if (reg->type == PTR_TO_STACK) {
+++		off += reg->var_off.value;
+++		err = check_stack_access(env, reg, off, size);
+++		if (err)
+++			return err;
+++
+++		state = func(env, reg);
+++		err = update_stack_depth(env, state, off);
+++		if (err)
+++			return err;
+++
+++		if (t == BPF_WRITE)
+++			err = check_stack_write(env, state, off, size,
+++						value_regno, insn_idx);
+++		else
+++			err = check_stack_read(env, state, off, size,
+++					       value_regno);
+++	} else if (reg_is_pkt_pointer(reg)) {
+++		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
+++			verbose(env, "cannot write into packet\n");
+++			return -EACCES;
+++		}
+++		if (t == BPF_WRITE && value_regno >= 0 &&
+++		    is_pointer_value(env, value_regno)) {
+++			verbose(env, "R%d leaks addr into packet\n",
+++				value_regno);
+++			return -EACCES;
+++		}
+++		err = check_packet_access(env, regno, off, size, false);
+++		if (!err && t == BPF_READ && value_regno >= 0)
+++			mark_reg_unknown(env, regs, value_regno);
+++	} else if (reg->type == PTR_TO_FLOW_KEYS) {
+++		if (t == BPF_WRITE && value_regno >= 0 &&
+++		    is_pointer_value(env, value_regno)) {
+++			verbose(env, "R%d leaks addr into flow keys\n",
+++				value_regno);
++ 			return -EACCES;
++ 		}
+++
+++		err = check_flow_keys_access(env, off, size);
+++		if (!err && t == BPF_READ && value_regno >= 0)
+++			mark_reg_unknown(env, regs, value_regno);
+++	} else if (type_is_sk_pointer(reg->type)) {
++ 		if (t == BPF_WRITE) {
++-			if (!env->allow_ptr_leaks &&
++-			    state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
++-			    size != BPF_REG_SIZE) {
++-				verbose("attempt to corrupt spilled pointer on stack\n");
++-				return -EACCES;
++-			}
++-			err = check_stack_write(state, off, size, value_regno);
++-		} else {
++-			err = check_stack_read(state, off, size, value_regno);
+++			verbose(env, "R%d cannot write into %s\n",
+++				regno, reg_type_str[reg->type]);
+++			return -EACCES;
++ 		}
+++		err = check_sock_access(env, insn_idx, regno, off, size, t);
+++		if (!err && value_regno >= 0)
+++			mark_reg_unknown(env, regs, value_regno);
+++	} else if (reg->type == PTR_TO_TP_BUFFER) {
+++		err = check_tp_buffer_access(env, reg, regno, off, size);
+++		if (!err && t == BPF_READ && value_regno >= 0)
+++			mark_reg_unknown(env, regs, value_regno);
++ 	} else {
++-		verbose("R%d invalid mem access '%s'\n",
++-			regno, reg_type_str[state->regs[regno].type]);
+++		verbose(env, "R%d invalid mem access '%s'\n", regno,
+++			reg_type_str[reg->type]);
++ 		return -EACCES;
++ 	}
+++
+++	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
+++	    regs[value_regno].type == SCALAR_VALUE) {
+++		/* b/h/w load zero-extends, mark upper bits as known 0 */
+++		coerce_reg_to_size(&regs[value_regno], size);
+++	}
++ 	return err;
++ }
++ 
++-static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
+++static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
++ {
++-	struct reg_state *regs = env->cur_state.regs;
++ 	int err;
++ 
++ 	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
++ 	    insn->imm != 0) {
++-		verbose("BPF_XADD uses reserved fields\n");
+++		verbose(env, "BPF_XADD uses reserved fields\n");
++ 		return -EINVAL;
++ 	}
++ 
++ 	/* check src1 operand */
++-	err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+++	err = check_reg_arg(env, insn->src_reg, SRC_OP);
++ 	if (err)
++ 		return err;
++ 
++ 	/* check src2 operand */
++-	err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+++	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
++ 	if (err)
++ 		return err;
++ 
+++	if (is_pointer_value(env, insn->src_reg)) {
+++		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
+++		return -EACCES;
+++	}
+++
+++	if (is_ctx_reg(env, insn->dst_reg) ||
+++	    is_pkt_reg(env, insn->dst_reg) ||
+++	    is_flow_key_reg(env, insn->dst_reg) ||
+++	    is_sk_reg(env, insn->dst_reg)) {
+++		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
+++			insn->dst_reg,
+++			reg_type_str[reg_state(env, insn->dst_reg)->type]);
+++		return -EACCES;
+++	}
+++
++ 	/* check whether atomic_add can read the memory */
++-	err = check_mem_access(env, insn->dst_reg, insn->off,
++-			       BPF_SIZE(insn->code), BPF_READ, -1);
+++	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+++			       BPF_SIZE(insn->code), BPF_READ, -1, true);
++ 	if (err)
++ 		return err;
++ 
++ 	/* check whether atomic_add can write into the same memory */
++-	return check_mem_access(env, insn->dst_reg, insn->off,
++-				BPF_SIZE(insn->code), BPF_WRITE, -1);
+++	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+++				BPF_SIZE(insn->code), BPF_WRITE, -1, true);
+++}
+++
+++static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
+++				  int off, int access_size,
+++				  bool zero_size_allowed)
+++{
+++	struct bpf_reg_state *reg = reg_state(env, regno);
+++
+++	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
+++	    access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
+++		if (tnum_is_const(reg->var_off)) {
+++			verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
+++				regno, off, access_size);
+++		} else {
+++			char tn_buf[48];
+++
+++			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+++			verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
+++				regno, tn_buf, access_size);
+++		}
+++		return -EACCES;
+++	}
+++	return 0;
++ }
++ 
++ /* when register 'regno' is passed into function that will read 'access_size'
++  * bytes from that pointer, make sure that it's within stack boundary
++- * and all elements of stack are initialized
+++ * and all elements of stack are initialized.
+++ * Unlike most pointer bounds-checking functions, this one doesn't take an
+++ * 'off' argument, so it has to add in reg->off itself.
++  */
++-static int check_stack_boundary(struct verifier_env *env,
++-				int regno, int access_size)
+++static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
+++				int access_size, bool zero_size_allowed,
+++				struct bpf_call_arg_meta *meta)
++ {
++-	struct verifier_state *state = &env->cur_state;
++-	struct reg_state *regs = state->regs;
++-	int off, i;
+++	struct bpf_reg_state *reg = reg_state(env, regno);
+++	struct bpf_func_state *state = func(env, reg);
+++	int err, min_off, max_off, i, j, slot, spi;
+++
+++	if (reg->type != PTR_TO_STACK) {
+++		/* Allow zero-byte read from NULL, regardless of pointer type */
+++		if (zero_size_allowed && access_size == 0 &&
+++		    register_is_null(reg))
+++			return 0;
++ 
++-	if (regs[regno].type != PTR_TO_STACK)
+++		verbose(env, "R%d type=%s expected=%s\n", regno,
+++			reg_type_str[reg->type],
+++			reg_type_str[PTR_TO_STACK]);
++ 		return -EACCES;
+++	}
++ 
++-	off = regs[regno].imm;
++-	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
++-	    access_size <= 0) {
++-		verbose("invalid stack type R%d off=%d access_size=%d\n",
++-			regno, off, access_size);
+++	if (tnum_is_const(reg->var_off)) {
+++		min_off = max_off = reg->var_off.value + reg->off;
+++		err = __check_stack_boundary(env, regno, min_off, access_size,
+++					     zero_size_allowed);
+++		if (err)
+++			return err;
+++	} else {
+++		/* Variable offset is prohibited for unprivileged mode for
+++		 * simplicity since it requires corresponding support in
+++		 * Spectre masking for stack ALU.
+++		 * See also retrieve_ptr_limit().
+++		 */
+++		if (!env->allow_ptr_leaks) {
+++			char tn_buf[48];
+++
+++			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+++			verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
+++				regno, tn_buf);
+++			return -EACCES;
+++		}
+++		/* Only initialized buffer on stack is allowed to be accessed
+++		 * with variable offset. With uninitialized buffer it's hard to
+++		 * guarantee that whole memory is marked as initialized on
+++		 * helper return since specific bounds are unknown what may
+++		 * cause uninitialized stack leaking.
+++		 */
+++		if (meta && meta->raw_mode)
+++			meta = NULL;
+++
+++		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
+++		    reg->smax_value <= -BPF_MAX_VAR_OFF) {
+++			verbose(env, "R%d unbounded indirect variable offset stack access\n",
+++				regno);
+++			return -EACCES;
+++		}
+++		min_off = reg->smin_value + reg->off;
+++		max_off = reg->smax_value + reg->off;
+++		err = __check_stack_boundary(env, regno, min_off, access_size,
+++					     zero_size_allowed);
+++		if (err) {
+++			verbose(env, "R%d min value is outside of stack bound\n",
+++				regno);
+++			return err;
+++		}
+++		err = __check_stack_boundary(env, regno, max_off, access_size,
+++					     zero_size_allowed);
+++		if (err) {
+++			verbose(env, "R%d max value is outside of stack bound\n",
+++				regno);
+++			return err;
+++		}
+++	}
+++
+++	if (meta && meta->raw_mode) {
+++		meta->access_size = access_size;
+++		meta->regno = regno;
+++		return 0;
+++	}
+++
+++	for (i = min_off; i < max_off + access_size; i++) {
+++		u8 *stype;
+++
+++		slot = -i - 1;
+++		spi = slot / BPF_REG_SIZE;
+++		if (state->allocated_stack <= slot)
+++			goto err;
+++		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
+++		if (*stype == STACK_MISC)
+++			goto mark;
+++		if (*stype == STACK_ZERO) {
+++			/* helper can write anything into the stack */
+++			*stype = STACK_MISC;
+++			goto mark;
+++		}
+++		if (state->stack[spi].slot_type[0] == STACK_SPILL &&
+++		    state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
+++			__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
+++			for (j = 0; j < BPF_REG_SIZE; j++)
+++				state->stack[spi].slot_type[j] = STACK_MISC;
+++			goto mark;
+++		}
+++
+++err:
+++		if (tnum_is_const(reg->var_off)) {
+++			verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
+++				min_off, i - min_off, access_size);
+++		} else {
+++			char tn_buf[48];
+++
+++			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+++			verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
+++				tn_buf, i - min_off, access_size);
+++		}
++ 		return -EACCES;
+++mark:
+++		/* reading any byte out of 8-byte 'spill_slot' will cause
+++		 * the whole slot to be marked as 'read'
+++		 */
+++		mark_reg_read(env, &state->stack[spi].spilled_ptr,
+++			      state->stack[spi].spilled_ptr.parent,
+++			      REG_LIVE_READ64);
++ 	}
+++	return update_stack_depth(env, state, min_off);
+++}
+++
+++static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
+++				   int access_size, bool zero_size_allowed,
+++				   struct bpf_call_arg_meta *meta)
+++{
+++	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
++ 
++-	for (i = 0; i < access_size; i++) {
++-		if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) {
++-			verbose("invalid indirect read from stack off %d+%d size %d\n",
++-				off, i, access_size);
+++	switch (reg->type) {
+++	case PTR_TO_PACKET:
+++	case PTR_TO_PACKET_META:
+++		return check_packet_access(env, regno, reg->off, access_size,
+++					   zero_size_allowed);
+++	case PTR_TO_MAP_VALUE:
+++		if (check_map_access_type(env, regno, reg->off, access_size,
+++					  meta && meta->raw_mode ? BPF_WRITE :
+++					  BPF_READ))
++ 			return -EACCES;
+++		return check_map_access(env, regno, reg->off, access_size,
+++					zero_size_allowed);
+++	default: /* scalar_value|ptr_to_stack or invalid ptr */
+++		return check_stack_boundary(env, regno, access_size,
+++					    zero_size_allowed, meta);
+++	}
+++}
+++
+++/* Implementation details:
+++ * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
+++ * Two bpf_map_lookups (even with the same key) will have different reg->id.
+++ * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
+++ * value_or_null->value transition, since the verifier only cares about
+++ * the range of access to valid map value pointer and doesn't care about actual
+++ * address of the map element.
+++ * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
+++ * reg->id > 0 after value_or_null->value transition. By doing so
+++ * two bpf_map_lookups will be considered two different pointers that
+++ * point to different bpf_spin_locks.
+++ * The verifier allows taking only one bpf_spin_lock at a time to avoid
+++ * dead-locks.
+++ * Since only one bpf_spin_lock is allowed the checks are simpler than
+++ * reg_is_refcounted() logic. The verifier needs to remember only
+++ * one spin_lock instead of array of acquired_refs.
+++ * cur_state->active_spin_lock remembers which map value element got locked
+++ * and clears it after bpf_spin_unlock.
+++ */
+++static int process_spin_lock(struct bpf_verifier_env *env, int regno,
+++			     bool is_lock)
+++{
+++	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+++	struct bpf_verifier_state *cur = env->cur_state;
+++	bool is_const = tnum_is_const(reg->var_off);
+++	struct bpf_map *map = reg->map_ptr;
+++	u64 val = reg->var_off.value;
+++
+++	if (reg->type != PTR_TO_MAP_VALUE) {
+++		verbose(env, "R%d is not a pointer to map_value\n", regno);
+++		return -EINVAL;
+++	}
+++	if (!is_const) {
+++		verbose(env,
+++			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
+++			regno);
+++		return -EINVAL;
+++	}
+++	if (!map->btf) {
+++		verbose(env,
+++			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
+++			map->name);
+++		return -EINVAL;
+++	}
+++	if (!map_value_has_spin_lock(map)) {
+++		if (map->spin_lock_off == -E2BIG)
+++			verbose(env,
+++				"map '%s' has more than one 'struct bpf_spin_lock'\n",
+++				map->name);
+++		else if (map->spin_lock_off == -ENOENT)
+++			verbose(env,
+++				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
+++				map->name);
+++		else
+++			verbose(env,
+++				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
+++				map->name);
+++		return -EINVAL;
+++	}
+++	if (map->spin_lock_off != val + reg->off) {
+++		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
+++			val + reg->off);
+++		return -EINVAL;
+++	}
+++	if (is_lock) {
+++		if (cur->active_spin_lock) {
+++			verbose(env,
+++				"Locking two bpf_spin_locks are not allowed\n");
+++			return -EINVAL;
+++		}
+++		cur->active_spin_lock = reg->id;
+++	} else {
+++		if (!cur->active_spin_lock) {
+++			verbose(env, "bpf_spin_unlock without taking a lock\n");
+++			return -EINVAL;
++ 		}
+++		if (cur->active_spin_lock != reg->id) {
+++			verbose(env, "bpf_spin_unlock of different lock\n");
+++			return -EINVAL;
+++		}
+++		cur->active_spin_lock = 0;
++ 	}
++ 	return 0;
++ }
++ 
++-static int check_func_arg(struct verifier_env *env, u32 regno,
++-			  enum bpf_arg_type arg_type, struct bpf_map **mapp)
+++static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
+++{
+++	return type == ARG_PTR_TO_MEM ||
+++	       type == ARG_PTR_TO_MEM_OR_NULL ||
+++	       type == ARG_PTR_TO_UNINIT_MEM;
+++}
+++
+++static bool arg_type_is_mem_size(enum bpf_arg_type type)
+++{
+++	return type == ARG_CONST_SIZE ||
+++	       type == ARG_CONST_SIZE_OR_ZERO;
+++}
+++
+++static bool arg_type_is_int_ptr(enum bpf_arg_type type)
+++{
+++	return type == ARG_PTR_TO_INT ||
+++	       type == ARG_PTR_TO_LONG;
+++}
+++
+++static int int_ptr_type_to_size(enum bpf_arg_type type)
+++{
+++	if (type == ARG_PTR_TO_INT)
+++		return sizeof(u32);
+++	else if (type == ARG_PTR_TO_LONG)
+++		return sizeof(u64);
+++
+++	return -EINVAL;
+++}
+++
+++static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
+++			  enum bpf_arg_type arg_type,
+++			  struct bpf_call_arg_meta *meta)
++ {
++-	struct reg_state *reg = env->cur_state.regs + regno;
++-	enum bpf_reg_type expected_type;
+++	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+++	enum bpf_reg_type expected_type, type = reg->type;
++ 	int err = 0;
++ 
++ 	if (arg_type == ARG_DONTCARE)
++ 		return 0;
++ 
++-	if (reg->type == NOT_INIT) {
++-		verbose("R%d !read_ok\n", regno);
++-		return -EACCES;
++-	}
+++	err = check_reg_arg(env, regno, SRC_OP);
+++	if (err)
+++		return err;
++ 
++ 	if (arg_type == ARG_ANYTHING) {
++ 		if (is_pointer_value(env, regno)) {
++-			verbose("R%d leaks addr into helper function\n", regno);
+++			verbose(env, "R%d leaks addr into helper function\n",
+++				regno);
++ 			return -EACCES;
++ 		}
++ 		return 0;
++ 	}
++ 
++-	if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
++-	    arg_type == ARG_PTR_TO_MAP_VALUE) {
+++	if (type_is_pkt_pointer(type) &&
+++	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
+++		verbose(env, "helper access to the packet is not allowed\n");
+++		return -EACCES;
+++	}
+++
+++	if (arg_type == ARG_PTR_TO_MAP_KEY ||
+++	    arg_type == ARG_PTR_TO_MAP_VALUE ||
+++	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
+++	    arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
++ 		expected_type = PTR_TO_STACK;
++-	} else if (arg_type == ARG_CONST_STACK_SIZE) {
++-		expected_type = CONST_IMM;
+++		if (register_is_null(reg) &&
+++		    arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL)
+++			/* final test in check_stack_boundary() */;
+++		else if (!type_is_pkt_pointer(type) &&
+++			 type != PTR_TO_MAP_VALUE &&
+++			 type != expected_type)
+++			goto err_type;
+++	} else if (arg_type == ARG_CONST_SIZE ||
+++		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
+++		expected_type = SCALAR_VALUE;
+++		if (type != expected_type)
+++			goto err_type;
++ 	} else if (arg_type == ARG_CONST_MAP_PTR) {
++ 		expected_type = CONST_PTR_TO_MAP;
+++		if (type != expected_type)
+++			goto err_type;
++ 	} else if (arg_type == ARG_PTR_TO_CTX) {
++ 		expected_type = PTR_TO_CTX;
+++		if (type != expected_type)
+++			goto err_type;
+++		err = check_ctx_reg(env, reg, regno);
+++		if (err < 0)
+++			return err;
+++	} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
+++		expected_type = PTR_TO_SOCK_COMMON;
+++		/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
+++		if (!type_is_sk_pointer(type))
+++			goto err_type;
+++		if (reg->ref_obj_id) {
+++			if (meta->ref_obj_id) {
+++				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+++					regno, reg->ref_obj_id,
+++					meta->ref_obj_id);
+++				return -EFAULT;
+++			}
+++			meta->ref_obj_id = reg->ref_obj_id;
+++		}
+++	} else if (arg_type == ARG_PTR_TO_SOCKET) {
+++		expected_type = PTR_TO_SOCKET;
+++		if (type != expected_type)
+++			goto err_type;
+++	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
+++		if (meta->func_id == BPF_FUNC_spin_lock) {
+++			if (process_spin_lock(env, regno, true))
+++				return -EACCES;
+++		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
+++			if (process_spin_lock(env, regno, false))
+++				return -EACCES;
+++		} else {
+++			verbose(env, "verifier internal error\n");
+++			return -EFAULT;
+++		}
+++	} else if (arg_type_is_mem_ptr(arg_type)) {
+++		expected_type = PTR_TO_STACK;
+++		/* One exception here. In case function allows for NULL to be
+++		 * passed in as argument, it's a SCALAR_VALUE type. Final test
+++		 * happens during stack boundary checking.
+++		 */
+++		if (register_is_null(reg) &&
+++		    arg_type == ARG_PTR_TO_MEM_OR_NULL)
+++			/* final test in check_stack_boundary() */;
+++		else if (!type_is_pkt_pointer(type) &&
+++			 type != PTR_TO_MAP_VALUE &&
+++			 type != expected_type)
+++			goto err_type;
+++		meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
+++	} else if (arg_type_is_int_ptr(arg_type)) {
+++		expected_type = PTR_TO_STACK;
+++		if (!type_is_pkt_pointer(type) &&
+++		    type != PTR_TO_MAP_VALUE &&
+++		    type != expected_type)
+++			goto err_type;
++ 	} else {
++-		verbose("unsupported arg_type %d\n", arg_type);
+++		verbose(env, "unsupported arg_type %d\n", arg_type);
++ 		return -EFAULT;
++ 	}
++ 
++-	if (reg->type != expected_type) {
++-		verbose("R%d type=%s expected=%s\n", regno,
++-			reg_type_str[reg->type], reg_type_str[expected_type]);
++-		return -EACCES;
++-	}
++-
++ 	if (arg_type == ARG_CONST_MAP_PTR) {
++ 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
++-		*mapp = reg->map_ptr;
++-
+++		meta->map_ptr = reg->map_ptr;
++ 	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
++ 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
++ 		 * check that [key, key + map->key_size) are within
++ 		 * stack limits and initialized
++ 		 */
++-		if (!*mapp) {
+++		if (!meta->map_ptr) {
++ 			/* in function declaration map_ptr must come before
++ 			 * map_key, so that it's verified and known before
++ 			 * we have to check map_key here. Otherwise it means
++ 			 * that kernel subsystem misconfigured verifier
++ 			 */
++-			verbose("invalid map_ptr to access map->key\n");
+++			verbose(env, "invalid map_ptr to access map->key\n");
++ 			return -EACCES;
++ 		}
++-		err = check_stack_boundary(env, regno, (*mapp)->key_size);
++-
++-	} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
+++		err = check_helper_mem_access(env, regno,
+++					      meta->map_ptr->key_size, false,
+++					      NULL);
+++	} else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
+++		   (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
+++		    !register_is_null(reg)) ||
+++		   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
++ 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
++ 		 * check [value, value + map->value_size) validity
++ 		 */
++-		if (!*mapp) {
+++		if (!meta->map_ptr) {
++ 			/* kernel subsystem misconfigured verifier */
++-			verbose("invalid map_ptr to access map->value\n");
+++			verbose(env, "invalid map_ptr to access map->value\n");
++ 			return -EACCES;
++ 		}
++-		err = check_stack_boundary(env, regno, (*mapp)->value_size);
+++		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
+++		err = check_helper_mem_access(env, regno,
+++					      meta->map_ptr->value_size, false,
+++					      meta);
+++	} else if (arg_type_is_mem_size(arg_type)) {
+++		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
++ 
++-	} else if (arg_type == ARG_CONST_STACK_SIZE) {
++-		/* bpf_xxx(..., buf, len) call will access 'len' bytes
++-		 * from stack pointer 'buf'. Check it
++-		 * note: regno == len, regno - 1 == buf
+++		/* remember the mem_size which may be used later
+++		 * to refine return values.
++ 		 */
++-		if (regno == 0) {
++-			/* kernel subsystem misconfigured verifier */
++-			verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
+++		meta->msize_max_value = reg->umax_value;
+++
+++		/* The register is SCALAR_VALUE; the access check
+++		 * happens using its boundaries.
+++		 */
+++		if (!tnum_is_const(reg->var_off))
+++			/* For unprivileged variable accesses, disable raw
+++			 * mode so that the program is required to
+++			 * initialize all the memory that the helper could
+++			 * just partially fill up.
+++			 */
+++			meta = NULL;
+++
+++		if (reg->smin_value < 0) {
+++			verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
+++				regno);
+++			return -EACCES;
+++		}
+++
+++		if (reg->umin_value == 0) {
+++			err = check_helper_mem_access(env, regno - 1, 0,
+++						      zero_size_allowed,
+++						      meta);
+++			if (err)
+++				return err;
+++		}
+++
+++		if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
+++			verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+++				regno);
++ 			return -EACCES;
++ 		}
++-		err = check_stack_boundary(env, regno - 1, reg->imm);
+++		err = check_helper_mem_access(env, regno - 1,
+++					      reg->umax_value,
+++					      zero_size_allowed, meta);
+++		if (!err)
+++			err = mark_chain_precision(env, regno);
+++	} else if (arg_type_is_int_ptr(arg_type)) {
+++		int size = int_ptr_type_to_size(arg_type);
+++
+++		err = check_helper_mem_access(env, regno, size, false, meta);
+++		if (err)
+++			return err;
+++		err = check_ptr_alignment(env, reg, 0, size, true);
++ 	}
++ 
++ 	return err;
+++err_type:
+++	verbose(env, "R%d type=%s expected=%s\n", regno,
+++		reg_type_str[type], reg_type_str[expected_type]);
+++	return -EACCES;
++ }
++ 
++-static int check_map_func_compatibility(struct bpf_map *map, int func_id)
+++static int check_map_func_compatibility(struct bpf_verifier_env *env,
+++					struct bpf_map *map, int func_id)
++ {
++ 	if (!map)
++ 		return 0;
++@@ -911,7 +3474,74 @@ static int check_map_func_compatibility(
++ 		break;
++ 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
++ 		if (func_id != BPF_FUNC_perf_event_read &&
++-		    func_id != BPF_FUNC_perf_event_output)
+++		    func_id != BPF_FUNC_perf_event_output &&
+++		    func_id != BPF_FUNC_perf_event_read_value)
+++			goto error;
+++		break;
+++	case BPF_MAP_TYPE_STACK_TRACE:
+++		if (func_id != BPF_FUNC_get_stackid)
+++			goto error;
+++		break;
+++	case BPF_MAP_TYPE_CGROUP_ARRAY:
+++		if (func_id != BPF_FUNC_skb_under_cgroup &&
+++		    func_id != BPF_FUNC_current_task_under_cgroup)
+++			goto error;
+++		break;
+++	case BPF_MAP_TYPE_CGROUP_STORAGE:
+++		if (func_id != BPF_FUNC_get_local_storage)
+++			goto error;
+++		break;
+++	case BPF_MAP_TYPE_DEVMAP:
+++	case BPF_MAP_TYPE_DEVMAP_HASH:
+++		if (func_id != BPF_FUNC_redirect_map &&
+++		    func_id != BPF_FUNC_map_lookup_elem)
+++			goto error;
+++		break;
+++	/* Restrict bpf side of cpumap and xskmap, open when use-cases
+++	 * appear.
+++	 */
+++	case BPF_MAP_TYPE_CPUMAP:
+++		if (func_id != BPF_FUNC_redirect_map)
+++			goto error;
+++		break;
+++	case BPF_MAP_TYPE_XSKMAP:
+++		if (func_id != BPF_FUNC_redirect_map &&
+++		    func_id != BPF_FUNC_map_lookup_elem)
+++			goto error;
+++		break;
+++	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+++	case BPF_MAP_TYPE_HASH_OF_MAPS:
+++		if (func_id != BPF_FUNC_map_lookup_elem)
+++			goto error;
+++		break;
+++	case BPF_MAP_TYPE_SOCKMAP:
+++		if (func_id != BPF_FUNC_sk_redirect_map &&
+++		    func_id != BPF_FUNC_sock_map_update &&
+++		    func_id != BPF_FUNC_map_delete_elem &&
+++		    func_id != BPF_FUNC_msg_redirect_map)
+++			goto error;
+++		break;
+++	case BPF_MAP_TYPE_SOCKHASH:
+++		if (func_id != BPF_FUNC_sk_redirect_hash &&
+++		    func_id != BPF_FUNC_sock_hash_update &&
+++		    func_id != BPF_FUNC_map_delete_elem &&
+++		    func_id != BPF_FUNC_msg_redirect_hash)
+++			goto error;
+++		break;
+++	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+++		if (func_id != BPF_FUNC_sk_select_reuseport)
+++			goto error;
+++		break;
+++	case BPF_MAP_TYPE_QUEUE:
+++	case BPF_MAP_TYPE_STACK:
+++		if (func_id != BPF_FUNC_map_peek_elem &&
+++		    func_id != BPF_FUNC_map_pop_elem &&
+++		    func_id != BPF_FUNC_map_push_elem)
+++			goto error;
+++		break;
+++	case BPF_MAP_TYPE_SK_STORAGE:
+++		if (func_id != BPF_FUNC_sk_storage_get &&
+++		    func_id != BPF_FUNC_sk_storage_delete)
++ 			goto error;
++ 		break;
++ 	default:
++@@ -923,109 +3553,1579 @@ static int check_map_func_compatibility(
++ 	case BPF_FUNC_tail_call:
++ 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
++ 			goto error;
+++		if (env->subprog_cnt > 1) {
+++			verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
+++			return -EINVAL;
+++		}
++ 		break;
++ 	case BPF_FUNC_perf_event_read:
++ 	case BPF_FUNC_perf_event_output:
+++	case BPF_FUNC_perf_event_read_value:
++ 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
++ 			goto error;
++ 		break;
+++	case BPF_FUNC_get_stackid:
+++		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
+++			goto error;
+++		break;
+++	case BPF_FUNC_current_task_under_cgroup:
+++	case BPF_FUNC_skb_under_cgroup:
+++		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
+++			goto error;
+++		break;
+++	case BPF_FUNC_redirect_map:
+++		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
+++		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
+++		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
+++		    map->map_type != BPF_MAP_TYPE_XSKMAP)
+++			goto error;
+++		break;
+++	case BPF_FUNC_sk_redirect_map:
+++	case BPF_FUNC_msg_redirect_map:
+++	case BPF_FUNC_sock_map_update:
+++		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
+++			goto error;
+++		break;
+++	case BPF_FUNC_sk_redirect_hash:
+++	case BPF_FUNC_msg_redirect_hash:
+++	case BPF_FUNC_sock_hash_update:
+++		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
+++			goto error;
+++		break;
+++	case BPF_FUNC_get_local_storage:
+++		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
+++		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+++			goto error;
+++		break;
+++	case BPF_FUNC_sk_select_reuseport:
+++		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
+++			goto error;
+++		break;
+++	case BPF_FUNC_map_peek_elem:
+++	case BPF_FUNC_map_pop_elem:
+++	case BPF_FUNC_map_push_elem:
+++		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
+++		    map->map_type != BPF_MAP_TYPE_STACK)
+++			goto error;
+++		break;
+++	case BPF_FUNC_sk_storage_get:
+++	case BPF_FUNC_sk_storage_delete:
+++		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
+++			goto error;
+++		break;
++ 	default:
++ 		break;
++ 	}
++ 
++ 	return 0;
++ error:
++-	verbose("cannot pass map_type %d into func %d\n",
++-		map->map_type, func_id);
+++	verbose(env, "cannot pass map_type %d into func %s#%d\n",
+++		map->map_type, func_id_name(func_id), func_id);
++ 	return -EINVAL;
++ }
++ 
++-static int check_call(struct verifier_env *env, int func_id)
+++static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
+++{
+++	int count = 0;
+++
+++	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
+++		count++;
+++	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
+++		count++;
+++	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
+++		count++;
+++	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
+++		count++;
+++	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
+++		count++;
+++
+++	/* We only support one arg being in raw mode at the moment,
+++	 * which is sufficient for the helper functions we have
+++	 * right now.
+++	 */
+++	return count <= 1;
+++}
+++
+++static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
+++				    enum bpf_arg_type arg_next)
+++{
+++	return (arg_type_is_mem_ptr(arg_curr) &&
+++	        !arg_type_is_mem_size(arg_next)) ||
+++	       (!arg_type_is_mem_ptr(arg_curr) &&
+++		arg_type_is_mem_size(arg_next));
+++}
+++
+++static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
+++{
+++	/* bpf_xxx(..., buf, len) call will access 'len'
+++	 * bytes from memory 'buf'. Both arg types need
+++	 * to be paired, so make sure there's no buggy
+++	 * helper function specification.
+++	 */
+++	if (arg_type_is_mem_size(fn->arg1_type) ||
+++	    arg_type_is_mem_ptr(fn->arg5_type)  ||
+++	    check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
+++	    check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
+++	    check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
+++	    check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
+++		return false;
+++
+++	return true;
+++}
+++
+++static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
+++{
+++	int count = 0;
+++
+++	if (arg_type_may_be_refcounted(fn->arg1_type))
+++		count++;
+++	if (arg_type_may_be_refcounted(fn->arg2_type))
+++		count++;
+++	if (arg_type_may_be_refcounted(fn->arg3_type))
+++		count++;
+++	if (arg_type_may_be_refcounted(fn->arg4_type))
+++		count++;
+++	if (arg_type_may_be_refcounted(fn->arg5_type))
+++		count++;
+++
+++	/* A reference acquiring function cannot acquire
+++	 * another refcounted ptr.
+++	 */
+++	if (is_acquire_function(func_id) && count)
+++		return false;
+++
+++	/* We only support one arg being unreferenced at the moment,
+++	 * which is sufficient for the helper functions we have right now.
+++	 */
+++	return count <= 1;
+++}
+++
+++static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
+++{
+++	return check_raw_mode_ok(fn) &&
+++	       check_arg_pair_ok(fn) &&
+++	       check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
+++}
+++
+++/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
+++ * are now invalid, so turn them into unknown SCALAR_VALUE.
+++ */
+++static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
+++				     struct bpf_func_state *state)
+++{
+++	struct bpf_reg_state *regs = state->regs, *reg;
+++	int i;
+++
+++	for (i = 0; i < MAX_BPF_REG; i++)
+++		if (reg_is_pkt_pointer_any(&regs[i]))
+++			mark_reg_unknown(env, regs, i);
+++
+++	bpf_for_each_spilled_reg(i, state, reg) {
+++		if (!reg)
+++			continue;
+++		if (reg_is_pkt_pointer_any(reg))
+++			__mark_reg_unknown(env, reg);
+++	}
+++}
+++
+++static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
+++{
+++	struct bpf_verifier_state *vstate = env->cur_state;
+++	int i;
+++
+++	for (i = 0; i <= vstate->curframe; i++)
+++		__clear_all_pkt_pointers(env, vstate->frame[i]);
+++}
+++
+++static void release_reg_references(struct bpf_verifier_env *env,
+++				   struct bpf_func_state *state,
+++				   int ref_obj_id)
+++{
+++	struct bpf_reg_state *regs = state->regs, *reg;
+++	int i;
+++
+++	for (i = 0; i < MAX_BPF_REG; i++)
+++		if (regs[i].ref_obj_id == ref_obj_id)
+++			mark_reg_unknown(env, regs, i);
+++
+++	bpf_for_each_spilled_reg(i, state, reg) {
+++		if (!reg)
+++			continue;
+++		if (reg->ref_obj_id == ref_obj_id)
+++			__mark_reg_unknown(env, reg);
+++	}
+++}
+++
+++/* The pointer with the specified id has released its reference to kernel
+++ * resources. Identify all copies of the same pointer and clear the reference.
+++ */
+++static int release_reference(struct bpf_verifier_env *env,
+++			     int ref_obj_id)
+++{
+++	struct bpf_verifier_state *vstate = env->cur_state;
+++	int err;
+++	int i;
+++
+++	err = release_reference_state(cur_func(env), ref_obj_id);
+++	if (err)
+++		return err;
+++
+++	for (i = 0; i <= vstate->curframe; i++)
+++		release_reg_references(env, vstate->frame[i], ref_obj_id);
+++
+++	return 0;
+++}
+++
+++static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+++			   int *insn_idx)
+++{
+++	struct bpf_verifier_state *state = env->cur_state;
+++	struct bpf_func_state *caller, *callee;
+++	int i, err, subprog, target_insn;
+++
+++	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
+++		verbose(env, "the call stack of %d frames is too deep\n",
+++			state->curframe + 2);
+++		return -E2BIG;
+++	}
+++
+++	target_insn = *insn_idx + insn->imm;
+++	subprog = find_subprog(env, target_insn + 1);
+++	if (subprog < 0) {
+++		verbose(env, "verifier bug. No program starts at insn %d\n",
+++			target_insn + 1);
+++		return -EFAULT;
+++	}
+++
+++	caller = state->frame[state->curframe];
+++	if (state->frame[state->curframe + 1]) {
+++		verbose(env, "verifier bug. Frame %d already allocated\n",
+++			state->curframe + 1);
+++		return -EFAULT;
+++	}
+++
+++	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
+++	if (!callee)
+++		return -ENOMEM;
+++	state->frame[state->curframe + 1] = callee;
+++
+++	/* callee cannot access r0, r6 - r9 for reading and has to write
+++	 * into its own stack before reading from it.
+++	 * callee can read/write into caller's stack
+++	 */
+++	init_func_state(env, callee,
+++			/* remember the callsite, it will be used by bpf_exit */
+++			*insn_idx /* callsite */,
+++			state->curframe + 1 /* frameno within this callchain */,
+++			subprog /* subprog number within this prog */);
+++
+++	/* Transfer references to the callee */
+++	err = transfer_reference_state(callee, caller);
+++	if (err)
+++		return err;
+++
+++	/* copy r1 - r5 args that callee can access.  The copy includes parent
+++	 * pointers, which connects us up to the liveness chain
+++	 */
+++	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+++		callee->regs[i] = caller->regs[i];
+++
+++	/* after the call registers r0 - r5 were scratched */
+++	for (i = 0; i < CALLER_SAVED_REGS; i++) {
+++		mark_reg_not_init(env, caller->regs, caller_saved[i]);
+++		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
+++	}
+++
+++	/* only increment it after check_reg_arg() finished */
+++	state->curframe++;
+++
+++	/* and go analyze first insn of the callee */
+++	*insn_idx = target_insn;
+++
+++	if (env->log.level & BPF_LOG_LEVEL) {
+++		verbose(env, "caller:\n");
+++		print_verifier_state(env, caller);
+++		verbose(env, "callee:\n");
+++		print_verifier_state(env, callee);
+++	}
+++	return 0;
+++}
+++
+++static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
+++{
+++	struct bpf_verifier_state *state = env->cur_state;
+++	struct bpf_func_state *caller, *callee;
+++	struct bpf_reg_state *r0;
+++	int err;
+++
+++	callee = state->frame[state->curframe];
+++	r0 = &callee->regs[BPF_REG_0];
+++	if (r0->type == PTR_TO_STACK) {
+++		/* technically it's ok to return caller's stack pointer
+++		 * (or caller's caller's pointer) back to the caller,
+++		 * since these pointers are valid. Only current stack
+++		 * pointer will be invalid as soon as function exits,
+++		 * but let's be conservative
+++		 */
+++		verbose(env, "cannot return stack pointer to the caller\n");
+++		return -EINVAL;
+++	}
+++
+++	state->curframe--;
+++	caller = state->frame[state->curframe];
+++	/* return to the caller whatever r0 had in the callee */
+++	caller->regs[BPF_REG_0] = *r0;
+++
+++	/* Transfer references to the caller */
+++	err = transfer_reference_state(caller, callee);
+++	if (err)
+++		return err;
+++
+++	*insn_idx = callee->callsite + 1;
+++	if (env->log.level & BPF_LOG_LEVEL) {
+++		verbose(env, "returning from callee:\n");
+++		print_verifier_state(env, callee);
+++		verbose(env, "to caller at %d:\n", *insn_idx);
+++		print_verifier_state(env, caller);
+++	}
+++	/* clear everything in the callee */
+++	free_func_state(callee);
+++	state->frame[state->curframe + 1] = NULL;
+++	return 0;
+++}
+++
+++static int do_refine_retval_range(struct bpf_verifier_env *env,
+++				  struct bpf_reg_state *regs, int ret_type,
+++				  int func_id, struct bpf_call_arg_meta *meta)
+++{
+++	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
+++	struct bpf_reg_state tmp_reg = *ret_reg;
+++	bool ret;
+++
+++	if (ret_type != RET_INTEGER ||
+++	    (func_id != BPF_FUNC_get_stack &&
+++	     func_id != BPF_FUNC_probe_read_str))
+++		return 0;
+++
+++	/* Error case where ret is in interval [S32MIN, -1]. */
+++	ret_reg->smin_value = S32_MIN;
+++	ret_reg->smax_value = -1;
+++
+++	__reg_deduce_bounds(ret_reg);
+++	__reg_bound_offset(ret_reg);
+++	__update_reg_bounds(ret_reg);
+++
+++	ret = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
+++	if (!ret)
+++		return -EFAULT;
+++
+++	*ret_reg = tmp_reg;
+++
+++	/* Success case where ret is in range [0, msize_max_value]. */
+++	ret_reg->smin_value = 0;
+++	ret_reg->smax_value = meta->msize_max_value;
+++	ret_reg->umin_value = ret_reg->smin_value;
+++	ret_reg->umax_value = ret_reg->smax_value;
+++
+++	__reg_deduce_bounds(ret_reg);
+++	__reg_bound_offset(ret_reg);
+++	__update_reg_bounds(ret_reg);
+++
+++	return 0;
+++}
+++
+++static int
+++record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
+++		int func_id, int insn_idx)
+++{
+++	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+++	struct bpf_map *map = meta->map_ptr;
+++
+++	if (func_id != BPF_FUNC_tail_call &&
+++	    func_id != BPF_FUNC_map_lookup_elem &&
+++	    func_id != BPF_FUNC_map_update_elem &&
+++	    func_id != BPF_FUNC_map_delete_elem &&
+++	    func_id != BPF_FUNC_map_push_elem &&
+++	    func_id != BPF_FUNC_map_pop_elem &&
+++	    func_id != BPF_FUNC_map_peek_elem)
+++		return 0;
+++
+++	if (map == NULL) {
+++		verbose(env, "kernel subsystem misconfigured verifier\n");
+++		return -EINVAL;
+++	}
+++
+++	/* In case of read-only, some additional restrictions
+++	 * need to be applied in order to prevent altering the
+++	 * state of the map from program side.
+++	 */
+++	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
+++	    (func_id == BPF_FUNC_map_delete_elem ||
+++	     func_id == BPF_FUNC_map_update_elem ||
+++	     func_id == BPF_FUNC_map_push_elem ||
+++	     func_id == BPF_FUNC_map_pop_elem)) {
+++		verbose(env, "write into map forbidden\n");
+++		return -EACCES;
+++	}
+++
+++	if (!BPF_MAP_PTR(aux->map_state))
+++		bpf_map_ptr_store(aux, meta->map_ptr,
+++				  meta->map_ptr->unpriv_array);
+++	else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
+++		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
+++				  meta->map_ptr->unpriv_array);
+++	return 0;
+++}
+++
+++static int check_reference_leak(struct bpf_verifier_env *env)
+++{
+++	struct bpf_func_state *state = cur_func(env);
+++	int i;
+++
+++	for (i = 0; i < state->acquired_refs; i++) {
+++		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
+++			state->refs[i].id, state->refs[i].insn_idx);
+++	}
+++	return state->acquired_refs ? -EINVAL : 0;
+++}
+++
+++static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
++ {
++-	struct verifier_state *state = &env->cur_state;
++ 	const struct bpf_func_proto *fn = NULL;
++-	struct reg_state *regs = state->regs;
++-	struct bpf_map *map = NULL;
++-	struct reg_state *reg;
+++	struct bpf_reg_state *regs;
+++	struct bpf_call_arg_meta meta;
+++	bool changes_data;
++ 	int i, err;
++ 
++ 	/* find function prototype */
++ 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
++-		verbose("invalid func %d\n", func_id);
+++		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
+++			func_id);
++ 		return -EINVAL;
++ 	}
++ 
++-	if (env->prog->aux->ops->get_func_proto)
++-		fn = env->prog->aux->ops->get_func_proto(func_id);
++-
+++	if (env->ops->get_func_proto)
+++		fn = env->ops->get_func_proto(func_id, env->prog);
++ 	if (!fn) {
++-		verbose("unknown func %d\n", func_id);
+++		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
+++			func_id);
++ 		return -EINVAL;
++ 	}
++ 
++ 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
++ 	if (!env->prog->gpl_compatible && fn->gpl_only) {
++-		verbose("cannot call GPL only function from proprietary program\n");
+++		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
+++		return -EINVAL;
+++	}
+++
+++	/* With LD_ABS/IND some JITs save/restore skb from r1. */
+++	changes_data = bpf_helper_changes_pkt_data(fn->func);
+++	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
+++		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
+++			func_id_name(func_id), func_id);
++ 		return -EINVAL;
++ 	}
++ 
+++	memset(&meta, 0, sizeof(meta));
+++	meta.pkt_access = fn->pkt_access;
+++
+++	err = check_func_proto(fn, func_id);
+++	if (err) {
+++		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
+++			func_id_name(func_id), func_id);
+++		return err;
+++	}
+++
+++	meta.func_id = func_id;
++ 	/* check args */
++-	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &map);
+++	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
++ 	if (err)
++ 		return err;
++-	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map);
+++	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
++ 	if (err)
++ 		return err;
++-	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map);
+++	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
++ 	if (err)
++ 		return err;
++-	err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &map);
+++	err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
++ 	if (err)
++ 		return err;
++-	err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &map);
+++	err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
++ 	if (err)
++ 		return err;
++ 
+++	err = record_func_map(env, &meta, func_id, insn_idx);
+++	if (err)
+++		return err;
+++
+++	/* Mark slots with STACK_MISC in case of raw mode, stack offset
+++	 * is inferred from register state.
+++	 */
+++	for (i = 0; i < meta.access_size; i++) {
+++		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
+++				       BPF_WRITE, -1, false);
+++		if (err)
+++			return err;
+++	}
+++
+++	if (func_id == BPF_FUNC_tail_call) {
+++		err = check_reference_leak(env);
+++		if (err) {
+++			verbose(env, "tail_call would lead to reference leak\n");
+++			return err;
+++		}
+++	} else if (is_release_function(func_id)) {
+++		err = release_reference(env, meta.ref_obj_id);
+++		if (err) {
+++			verbose(env, "func %s#%d reference has not been acquired before\n",
+++				func_id_name(func_id), func_id);
+++			return err;
+++		}
+++	}
+++
+++	regs = cur_regs(env);
+++
+++	/* check that flags argument in get_local_storage(map, flags) is 0,
+++	 * this is required because get_local_storage() can't return an error.
+++	 */
+++	if (func_id == BPF_FUNC_get_local_storage &&
+++	    !register_is_null(&regs[BPF_REG_2])) {
+++		verbose(env, "get_local_storage() doesn't support non-zero flags\n");
+++		return -EINVAL;
+++	}
+++
++ 	/* reset caller saved regs */
++ 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
++-		reg = regs + caller_saved[i];
++-		reg->type = NOT_INIT;
++-		reg->imm = 0;
+++		mark_reg_not_init(env, regs, caller_saved[i]);
+++		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
++ 	}
++ 
++-	/* update return register */
+++	/* helper call returns 64-bit value. */
+++	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
+++
+++	/* update return register (already marked as written above) */
++ 	if (fn->ret_type == RET_INTEGER) {
++-		regs[BPF_REG_0].type = UNKNOWN_VALUE;
+++		/* sets type to SCALAR_VALUE */
+++		mark_reg_unknown(env, regs, BPF_REG_0);
++ 	} else if (fn->ret_type == RET_VOID) {
++ 		regs[BPF_REG_0].type = NOT_INIT;
++-	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
++-		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+++	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
+++		   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
+++		/* There is no offset yet applied, variable or fixed */
+++		mark_reg_known_zero(env, regs, BPF_REG_0);
++ 		/* remember map_ptr, so that check_map_access()
++ 		 * can check 'value_size' boundary of memory access
++ 		 * to map element returned from bpf_map_lookup_elem()
++ 		 */
++-		if (map == NULL) {
++-			verbose("kernel subsystem misconfigured verifier\n");
+++		if (meta.map_ptr == NULL) {
+++			verbose(env,
+++				"kernel subsystem misconfigured verifier\n");
++ 			return -EINVAL;
++ 		}
++-		regs[BPF_REG_0].map_ptr = map;
+++		regs[BPF_REG_0].map_ptr = meta.map_ptr;
+++		if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
+++			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
+++			if (map_value_has_spin_lock(meta.map_ptr))
+++				regs[BPF_REG_0].id = ++env->id_gen;
+++		} else {
+++			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+++			regs[BPF_REG_0].id = ++env->id_gen;
+++		}
+++	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
+++		mark_reg_known_zero(env, regs, BPF_REG_0);
+++		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
+++		regs[BPF_REG_0].id = ++env->id_gen;
+++	} else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
+++		mark_reg_known_zero(env, regs, BPF_REG_0);
+++		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
+++		regs[BPF_REG_0].id = ++env->id_gen;
+++	} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
+++		mark_reg_known_zero(env, regs, BPF_REG_0);
+++		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
+++		regs[BPF_REG_0].id = ++env->id_gen;
++ 	} else {
++-		verbose("unknown return type %d of func %d\n",
++-			fn->ret_type, func_id);
+++		verbose(env, "unknown return type %d of func %s#%d\n",
+++			fn->ret_type, func_id_name(func_id), func_id);
++ 		return -EINVAL;
++ 	}
++ 
++-	err = check_map_func_compatibility(map, func_id);
+++	if (is_ptr_cast_function(func_id)) {
+++		/* For release_reference() */
+++		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
+++	} else if (is_acquire_function(func_id)) {
+++		int id = acquire_reference_state(env, insn_idx);
+++
+++		if (id < 0)
+++			return id;
+++		/* For mark_ptr_or_null_reg() */
+++		regs[BPF_REG_0].id = id;
+++		/* For release_reference() */
+++		regs[BPF_REG_0].ref_obj_id = id;
+++	}
+++
+++	err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
+++	if (err)
+++		return err;
+++
+++	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
++ 	if (err)
++ 		return err;
++ 
+++	if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
+++		const char *err_str;
+++
+++#ifdef CONFIG_PERF_EVENTS
+++		err = get_callchain_buffers(sysctl_perf_event_max_stack);
+++		err_str = "cannot get callchain buffer for func %s#%d\n";
+++#else
+++		err = -ENOTSUPP;
+++		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
+++#endif
+++		if (err) {
+++			verbose(env, err_str, func_id_name(func_id), func_id);
+++			return err;
+++		}
+++
+++		env->prog->has_callchain_buf = true;
+++	}
+++
+++	if (changes_data)
+++		clear_all_pkt_pointers(env);
++ 	return 0;
++ }
++ 
+++static bool signed_add_overflows(s64 a, s64 b)
+++{
+++	/* Do the add in u64, where overflow is well-defined */
+++	s64 res = (s64)((u64)a + (u64)b);
+++
+++	if (b < 0)
+++		return res > a;
+++	return res < a;
+++}
+++
+++static bool signed_sub_overflows(s64 a, s64 b)
+++{
+++	/* Do the sub in u64, where overflow is well-defined */
+++	s64 res = (s64)((u64)a - (u64)b);
+++
+++	if (b < 0)
+++		return res < a;
+++	return res > a;
+++}
+++
+++static bool check_reg_sane_offset(struct bpf_verifier_env *env,
+++				  const struct bpf_reg_state *reg,
+++				  enum bpf_reg_type type)
+++{
+++	bool known = tnum_is_const(reg->var_off);
+++	s64 val = reg->var_off.value;
+++	s64 smin = reg->smin_value;
+++
+++	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
+++		verbose(env, "math between %s pointer and %lld is not allowed\n",
+++			reg_type_str[type], val);
+++		return false;
+++	}
+++
+++	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
+++		verbose(env, "%s pointer offset %d is not allowed\n",
+++			reg_type_str[type], reg->off);
+++		return false;
+++	}
+++
+++	if (smin == S64_MIN) {
+++		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
+++			reg_type_str[type]);
+++		return false;
+++	}
+++
+++	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
+++		verbose(env, "value %lld makes %s pointer be out of bounds\n",
+++			smin, reg_type_str[type]);
+++		return false;
+++	}
+++
+++	return true;
+++}
+++
+++static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
+++{
+++	return &env->insn_aux_data[env->insn_idx];
+++}
+++
+++enum {
+++	REASON_BOUNDS	= -1,
+++	REASON_TYPE	= -2,
+++	REASON_PATHS	= -3,
+++	REASON_LIMIT	= -4,
+++	REASON_STACK	= -5,
+++};
+++
+++static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
+++			      u32 *alu_limit, bool mask_to_left)
+++{
+++	u32 max = 0, ptr_limit = 0;
+++
+++	switch (ptr_reg->type) {
+++	case PTR_TO_STACK:
+++		/* Offset 0 is out-of-bounds, but acceptable start for the
+++		 * left direction, see BPF_REG_FP. Also, unknown scalar
+++		 * offset where we would need to deal with min/max bounds is
+++		 * currently prohibited for unprivileged.
+++		 */
+++		max = MAX_BPF_STACK + mask_to_left;
+++		ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
+++		break;
+++	case PTR_TO_MAP_VALUE:
+++		max = ptr_reg->map_ptr->value_size;
+++		ptr_limit = (mask_to_left ?
+++			     ptr_reg->smin_value :
+++			     ptr_reg->umax_value) + ptr_reg->off;
+++		break;
+++	default:
+++		return REASON_TYPE;
+++	}
+++
+++	if (ptr_limit >= max)
+++		return REASON_LIMIT;
+++	*alu_limit = ptr_limit;
+++	return 0;
+++}
+++
+++static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
+++				    const struct bpf_insn *insn)
+++{
+++	return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
+++}
+++
+++static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
+++				       u32 alu_state, u32 alu_limit)
+++{
+++	/* If we arrived here from different branches with different
+++	 * state or limits to sanitize, then this won't work.
+++	 */
+++	if (aux->alu_state &&
+++	    (aux->alu_state != alu_state ||
+++	     aux->alu_limit != alu_limit))
+++		return REASON_PATHS;
+++
+++	/* Corresponding fixup done in fixup_bpf_calls(). */
+++	aux->alu_state = alu_state;
+++	aux->alu_limit = alu_limit;
+++	return 0;
+++}
+++
+++static int sanitize_val_alu(struct bpf_verifier_env *env,
+++			    struct bpf_insn *insn)
+++{
+++	struct bpf_insn_aux_data *aux = cur_aux(env);
+++
+++	if (can_skip_alu_sanitation(env, insn))
+++		return 0;
+++
+++	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
+++}
+++
+++static bool sanitize_needed(u8 opcode)
+++{
+++	return opcode == BPF_ADD || opcode == BPF_SUB;
+++}
+++
+++struct bpf_sanitize_info {
+++	struct bpf_insn_aux_data aux;
+++	bool mask_to_left;
+++};
+++
+++static struct bpf_verifier_state *
+++sanitize_speculative_path(struct bpf_verifier_env *env,
+++			  const struct bpf_insn *insn,
+++			  u32 next_idx, u32 curr_idx)
+++{
+++	struct bpf_verifier_state *branch;
+++	struct bpf_reg_state *regs;
+++
+++	branch = push_stack(env, next_idx, curr_idx, true);
+++	if (branch && insn) {
+++		regs = branch->frame[branch->curframe]->regs;
+++		if (BPF_SRC(insn->code) == BPF_K) {
+++			mark_reg_unknown(env, regs, insn->dst_reg);
+++		} else if (BPF_SRC(insn->code) == BPF_X) {
+++			mark_reg_unknown(env, regs, insn->dst_reg);
+++			mark_reg_unknown(env, regs, insn->src_reg);
+++		}
+++	}
+++	return branch;
+++}
+++
+++static int sanitize_ptr_alu(struct bpf_verifier_env *env,
+++			    struct bpf_insn *insn,
+++			    const struct bpf_reg_state *ptr_reg,
+++			    const struct bpf_reg_state *off_reg,
+++			    struct bpf_reg_state *dst_reg,
+++			    struct bpf_sanitize_info *info,
+++			    const bool commit_window)
+++{
+++	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
+++	struct bpf_verifier_state *vstate = env->cur_state;
+++	bool off_is_imm = tnum_is_const(off_reg->var_off);
+++	bool off_is_neg = off_reg->smin_value < 0;
+++	bool ptr_is_dst_reg = ptr_reg == dst_reg;
+++	u8 opcode = BPF_OP(insn->code);
+++	u32 alu_state, alu_limit;
+++	struct bpf_reg_state tmp;
+++	bool ret;
+++	int err;
+++
+++	if (can_skip_alu_sanitation(env, insn))
+++		return 0;
+++
+++	/* We already marked aux for masking from non-speculative
+++	 * paths, thus we got here in the first place. We only care
+++	 * to explore bad access from here.
+++	 */
+++	if (vstate->speculative)
+++		goto do_sim;
+++
+++	if (!commit_window) {
+++		if (!tnum_is_const(off_reg->var_off) &&
+++		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
+++			return REASON_BOUNDS;
+++
+++		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
+++				     (opcode == BPF_SUB && !off_is_neg);
+++	}
+++
+++	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
+++	if (err < 0)
+++		return err;
+++
+++	if (commit_window) {
+++		/* In commit phase we narrow the masking window based on
+++		 * the observed pointer move after the simulated operation.
+++		 */
+++		alu_state = info->aux.alu_state;
+++		alu_limit = abs(info->aux.alu_limit - alu_limit);
+++	} else {
+++		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
+++		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
+++		alu_state |= ptr_is_dst_reg ?
+++			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
+++	}
+++
+++	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
+++	if (err < 0)
+++		return err;
+++do_sim:
+++	/* If we're in commit phase, we're done here given we already
+++	 * pushed the truncated dst_reg into the speculative verification
+++	 * stack.
+++	 *
+++	 * Also, when register is a known constant, we rewrite register-based
+++	 * operation to immediate-based, and thus do not need masking (and as
+++	 * a consequence, do not need to simulate the zero-truncation either).
+++	 */
+++	if (commit_window || off_is_imm)
+++		return 0;
+++
+++	/* Simulate and find potential out-of-bounds access under
+++	 * speculative execution from truncation as a result of
+++	 * masking when off was not within expected range. If off
+++	 * sits in dst, then we temporarily need to move ptr there
+++	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
+++	 * for cases where we use K-based arithmetic in one direction
+++	 * and truncated reg-based in the other in order to explore
+++	 * bad access.
+++	 */
+++	if (!ptr_is_dst_reg) {
+++		tmp = *dst_reg;
+++		*dst_reg = *ptr_reg;
+++	}
+++	ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
+++					env->insn_idx);
+++	if (!ptr_is_dst_reg && ret)
+++		*dst_reg = tmp;
+++	return !ret ? REASON_STACK : 0;
+++}
+++
+++static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
+++{
+++	struct bpf_verifier_state *vstate = env->cur_state;
+++
+++	/* If we simulate paths under speculation, we don't update the
+++	 * insn as 'seen' such that when we verify unreachable paths in
+++	 * the non-speculative domain, sanitize_dead_code() can still
+++	 * rewrite/sanitize them.
+++	 */
+++	if (!vstate->speculative)
+++		env->insn_aux_data[env->insn_idx].seen = true;
+++}
+++
+++static int sanitize_err(struct bpf_verifier_env *env,
+++			const struct bpf_insn *insn, int reason,
+++			const struct bpf_reg_state *off_reg,
+++			const struct bpf_reg_state *dst_reg)
+++{
+++	static const char *err = "pointer arithmetic with it prohibited for !root";
+++	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
+++	u32 dst = insn->dst_reg, src = insn->src_reg;
+++
+++	switch (reason) {
+++	case REASON_BOUNDS:
+++		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
+++			off_reg == dst_reg ? dst : src, err);
+++		break;
+++	case REASON_TYPE:
+++		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
+++			off_reg == dst_reg ? src : dst, err);
+++		break;
+++	case REASON_PATHS:
+++		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
+++			dst, op, err);
+++		break;
+++	case REASON_LIMIT:
+++		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
+++			dst, op, err);
+++		break;
+++	case REASON_STACK:
+++		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
+++			dst, err);
+++		break;
+++	default:
+++		verbose(env, "verifier internal error: unknown reason (%d)\n",
+++			reason);
+++		break;
+++	}
+++
+++	return -EACCES;
+++}
+++
+++static int sanitize_check_bounds(struct bpf_verifier_env *env,
+++				 const struct bpf_insn *insn,
+++				 const struct bpf_reg_state *dst_reg)
+++{
+++	u32 dst = insn->dst_reg;
+++
+++	/* For unprivileged we require that resulting offset must be in bounds
+++	 * in order to be able to sanitize access later on.
+++	 */
+++	if (env->allow_ptr_leaks)
+++		return 0;
+++
+++	switch (dst_reg->type) {
+++	case PTR_TO_STACK:
+++		if (check_stack_access(env, dst_reg, dst_reg->off +
+++				       dst_reg->var_off.value, 1)) {
+++			verbose(env, "R%d stack pointer arithmetic goes out of range, "
+++				"prohibited for !root\n", dst);
+++			return -EACCES;
+++		}
+++		break;
+++	case PTR_TO_MAP_VALUE:
+++		if (check_map_access(env, dst, dst_reg->off, 1, false)) {
+++			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
+++				"prohibited for !root\n", dst);
+++			return -EACCES;
+++		}
+++		break;
+++	default:
+++		break;
+++	}
+++
+++	return 0;
+++}
+++
+++/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
+++ * Caller should also handle BPF_MOV case separately.
+++ * If we return -EACCES, caller may want to try again treating pointer as a
+++ * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
+++ */
+++static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+++				   struct bpf_insn *insn,
+++				   const struct bpf_reg_state *ptr_reg,
+++				   const struct bpf_reg_state *off_reg)
+++{
+++	struct bpf_verifier_state *vstate = env->cur_state;
+++	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+++	struct bpf_reg_state *regs = state->regs, *dst_reg;
+++	bool known = tnum_is_const(off_reg->var_off);
+++	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
+++	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
+++	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
+++	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
+++	struct bpf_sanitize_info info = {};
+++	u8 opcode = BPF_OP(insn->code);
+++	u32 dst = insn->dst_reg;
+++	int ret;
+++
+++	dst_reg = &regs[dst];
+++
+++	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
+++	    smin_val > smax_val || umin_val > umax_val) {
+++		/* Taint dst register if offset had invalid bounds derived from
+++		 * e.g. dead branches.
+++		 */
+++		__mark_reg_unknown(env, dst_reg);
+++		return 0;
+++	}
+++
+++	if (BPF_CLASS(insn->code) != BPF_ALU64) {
+++		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
+++		verbose(env,
+++			"R%d 32-bit pointer arithmetic prohibited\n",
+++			dst);
+++		return -EACCES;
+++	}
+++
+++	switch (ptr_reg->type) {
+++	case PTR_TO_MAP_VALUE_OR_NULL:
+++		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
+++			dst, reg_type_str[ptr_reg->type]);
+++		return -EACCES;
+++	case CONST_PTR_TO_MAP:
+++		/* smin_val represents the known value */
+++		if (known && smin_val == 0 && opcode == BPF_ADD)
+++			break;
+++		/* fall-through */
+++	case PTR_TO_PACKET_END:
+++	case PTR_TO_SOCKET:
+++	case PTR_TO_SOCKET_OR_NULL:
+++	case PTR_TO_SOCK_COMMON:
+++	case PTR_TO_SOCK_COMMON_OR_NULL:
+++	case PTR_TO_TCP_SOCK:
+++	case PTR_TO_TCP_SOCK_OR_NULL:
+++	case PTR_TO_XDP_SOCK:
+++		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
+++			dst, reg_type_str[ptr_reg->type]);
+++		return -EACCES;
+++	default:
+++		break;
+++	}
+++
+++	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
+++	 * The id may be overwritten later if we create a new variable offset.
+++	 */
+++	dst_reg->type = ptr_reg->type;
+++	dst_reg->id = ptr_reg->id;
+++
+++	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
+++	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
+++		return -EINVAL;
+++
+++	if (sanitize_needed(opcode)) {
+++		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
+++				       &info, false);
+++		if (ret < 0)
+++			return sanitize_err(env, insn, ret, off_reg, dst_reg);
+++	}
+++
+++	switch (opcode) {
+++	case BPF_ADD:
+++		/* We can take a fixed offset as long as it doesn't overflow
+++		 * the s32 'off' field
+++		 */
+++		if (known && (ptr_reg->off + smin_val ==
+++			      (s64)(s32)(ptr_reg->off + smin_val))) {
+++			/* pointer += K.  Accumulate it into fixed offset */
+++			dst_reg->smin_value = smin_ptr;
+++			dst_reg->smax_value = smax_ptr;
+++			dst_reg->umin_value = umin_ptr;
+++			dst_reg->umax_value = umax_ptr;
+++			dst_reg->var_off = ptr_reg->var_off;
+++			dst_reg->off = ptr_reg->off + smin_val;
+++			dst_reg->raw = ptr_reg->raw;
+++			break;
+++		}
+++		/* A new variable offset is created.  Note that off_reg->off
+++		 * == 0, since it's a scalar.
+++		 * dst_reg gets the pointer type and since some positive
+++		 * integer value was added to the pointer, give it a new 'id'
+++		 * if it's a PTR_TO_PACKET.
+++		 * this creates a new 'base' pointer, off_reg (variable) gets
+++		 * added into the variable offset, and we copy the fixed offset
+++		 * from ptr_reg.
+++		 */
+++		if (signed_add_overflows(smin_ptr, smin_val) ||
+++		    signed_add_overflows(smax_ptr, smax_val)) {
+++			dst_reg->smin_value = S64_MIN;
+++			dst_reg->smax_value = S64_MAX;
+++		} else {
+++			dst_reg->smin_value = smin_ptr + smin_val;
+++			dst_reg->smax_value = smax_ptr + smax_val;
+++		}
+++		if (umin_ptr + umin_val < umin_ptr ||
+++		    umax_ptr + umax_val < umax_ptr) {
+++			dst_reg->umin_value = 0;
+++			dst_reg->umax_value = U64_MAX;
+++		} else {
+++			dst_reg->umin_value = umin_ptr + umin_val;
+++			dst_reg->umax_value = umax_ptr + umax_val;
+++		}
+++		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
+++		dst_reg->off = ptr_reg->off;
+++		dst_reg->raw = ptr_reg->raw;
+++		if (reg_is_pkt_pointer(ptr_reg)) {
+++			dst_reg->id = ++env->id_gen;
+++			/* something was added to pkt_ptr, set range to zero */
+++			dst_reg->raw = 0;
+++		}
+++		break;
+++	case BPF_SUB:
+++		if (dst_reg == off_reg) {
+++			/* scalar -= pointer.  Creates an unknown scalar */
+++			verbose(env, "R%d tried to subtract pointer from scalar\n",
+++				dst);
+++			return -EACCES;
+++		}
+++		/* We don't allow subtraction from FP, because (according to
+++		 * test_verifier.c test "invalid fp arithmetic", JITs might not
+++		 * be able to deal with it.
+++		 */
+++		if (ptr_reg->type == PTR_TO_STACK) {
+++			verbose(env, "R%d subtraction from stack pointer prohibited\n",
+++				dst);
+++			return -EACCES;
+++		}
+++		if (known && (ptr_reg->off - smin_val ==
+++			      (s64)(s32)(ptr_reg->off - smin_val))) {
+++			/* pointer -= K.  Subtract it from fixed offset */
+++			dst_reg->smin_value = smin_ptr;
+++			dst_reg->smax_value = smax_ptr;
+++			dst_reg->umin_value = umin_ptr;
+++			dst_reg->umax_value = umax_ptr;
+++			dst_reg->var_off = ptr_reg->var_off;
+++			dst_reg->id = ptr_reg->id;
+++			dst_reg->off = ptr_reg->off - smin_val;
+++			dst_reg->raw = ptr_reg->raw;
+++			break;
+++		}
+++		/* A new variable offset is created.  If the subtrahend is known
+++		 * nonnegative, then any reg->range we had before is still good.
+++		 */
+++		if (signed_sub_overflows(smin_ptr, smax_val) ||
+++		    signed_sub_overflows(smax_ptr, smin_val)) {
+++			/* Overflow possible, we know nothing */
+++			dst_reg->smin_value = S64_MIN;
+++			dst_reg->smax_value = S64_MAX;
+++		} else {
+++			dst_reg->smin_value = smin_ptr - smax_val;
+++			dst_reg->smax_value = smax_ptr - smin_val;
+++		}
+++		if (umin_ptr < umax_val) {
+++			/* Overflow possible, we know nothing */
+++			dst_reg->umin_value = 0;
+++			dst_reg->umax_value = U64_MAX;
+++		} else {
+++			/* Cannot overflow (as long as bounds are consistent) */
+++			dst_reg->umin_value = umin_ptr - umax_val;
+++			dst_reg->umax_value = umax_ptr - umin_val;
+++		}
+++		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
+++		dst_reg->off = ptr_reg->off;
+++		dst_reg->raw = ptr_reg->raw;
+++		if (reg_is_pkt_pointer(ptr_reg)) {
+++			dst_reg->id = ++env->id_gen;
+++			/* something was added to pkt_ptr, set range to zero */
+++			if (smin_val < 0)
+++				dst_reg->raw = 0;
+++		}
+++		break;
+++	case BPF_AND:
+++	case BPF_OR:
+++	case BPF_XOR:
+++		/* bitwise ops on pointers are troublesome, prohibit. */
+++		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
+++			dst, bpf_alu_string[opcode >> 4]);
+++		return -EACCES;
+++	default:
+++		/* other operators (e.g. MUL,LSH) produce non-pointer results */
+++		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
+++			dst, bpf_alu_string[opcode >> 4]);
+++		return -EACCES;
+++	}
+++
+++	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
+++		return -EINVAL;
+++
+++	__update_reg_bounds(dst_reg);
+++	__reg_deduce_bounds(dst_reg);
+++	__reg_bound_offset(dst_reg);
+++
+++	if (sanitize_check_bounds(env, insn, dst_reg) < 0)
+++		return -EACCES;
+++	if (sanitize_needed(opcode)) {
+++		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
+++				       &info, true);
+++		if (ret < 0)
+++			return sanitize_err(env, insn, ret, off_reg, dst_reg);
+++	}
+++
+++	return 0;
+++}
+++
+++/* WARNING: This function does calculations on 64-bit values, but the actual
+++ * execution may occur on 32-bit values. Therefore, things like bitshifts
+++ * need extra checks in the 32-bit case.
+++ */
+++static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
+++				      struct bpf_insn *insn,
+++				      struct bpf_reg_state *dst_reg,
+++				      struct bpf_reg_state src_reg)
+++{
+++	struct bpf_reg_state *regs = cur_regs(env);
+++	u8 opcode = BPF_OP(insn->code);
+++	bool src_known, dst_known;
+++	s64 smin_val, smax_val;
+++	u64 umin_val, umax_val;
+++	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
+++	int ret;
+++
+++	if (insn_bitness == 32) {
+++		/* Relevant for 32-bit RSH: Information can propagate towards
+++		 * LSB, so it isn't sufficient to only truncate the output to
+++		 * 32 bits.
+++		 */
+++		coerce_reg_to_size(dst_reg, 4);
+++		coerce_reg_to_size(&src_reg, 4);
+++	}
+++
+++	smin_val = src_reg.smin_value;
+++	smax_val = src_reg.smax_value;
+++	umin_val = src_reg.umin_value;
+++	umax_val = src_reg.umax_value;
+++	src_known = tnum_is_const(src_reg.var_off);
+++	dst_known = tnum_is_const(dst_reg->var_off);
+++
+++	if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
+++	    smin_val > smax_val || umin_val > umax_val) {
+++		/* Taint dst register if offset had invalid bounds derived from
+++		 * e.g. dead branches.
+++		 */
+++		__mark_reg_unknown(env, dst_reg);
+++		return 0;
+++	}
+++
+++	if (!src_known &&
+++	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
+++		__mark_reg_unknown(env, dst_reg);
+++		return 0;
+++	}
+++
+++	if (sanitize_needed(opcode)) {
+++		ret = sanitize_val_alu(env, insn);
+++		if (ret < 0)
+++			return sanitize_err(env, insn, ret, NULL, NULL);
+++	}
+++
+++	switch (opcode) {
+++	case BPF_ADD:
+++		if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
+++		    signed_add_overflows(dst_reg->smax_value, smax_val)) {
+++			dst_reg->smin_value = S64_MIN;
+++			dst_reg->smax_value = S64_MAX;
+++		} else {
+++			dst_reg->smin_value += smin_val;
+++			dst_reg->smax_value += smax_val;
+++		}
+++		if (dst_reg->umin_value + umin_val < umin_val ||
+++		    dst_reg->umax_value + umax_val < umax_val) {
+++			dst_reg->umin_value = 0;
+++			dst_reg->umax_value = U64_MAX;
+++		} else {
+++			dst_reg->umin_value += umin_val;
+++			dst_reg->umax_value += umax_val;
+++		}
+++		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
+++		break;
+++	case BPF_SUB:
+++		if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
+++		    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
+++			/* Overflow possible, we know nothing */
+++			dst_reg->smin_value = S64_MIN;
+++			dst_reg->smax_value = S64_MAX;
+++		} else {
+++			dst_reg->smin_value -= smax_val;
+++			dst_reg->smax_value -= smin_val;
+++		}
+++		if (dst_reg->umin_value < umax_val) {
+++			/* Overflow possible, we know nothing */
+++			dst_reg->umin_value = 0;
+++			dst_reg->umax_value = U64_MAX;
+++		} else {
+++			/* Cannot overflow (as long as bounds are consistent) */
+++			dst_reg->umin_value -= umax_val;
+++			dst_reg->umax_value -= umin_val;
+++		}
+++		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
+++		break;
+++	case BPF_MUL:
+++		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
+++		if (smin_val < 0 || dst_reg->smin_value < 0) {
+++			/* Ain't nobody got time to multiply that sign */
+++			__mark_reg_unbounded(dst_reg);
+++			__update_reg_bounds(dst_reg);
+++			break;
+++		}
+++		/* Both values are positive, so we can work with unsigned and
+++		 * copy the result to signed (unless it exceeds S64_MAX).
+++		 */
+++		if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
+++			/* Potential overflow, we know nothing */
+++			__mark_reg_unbounded(dst_reg);
+++			/* (except what we can learn from the var_off) */
+++			__update_reg_bounds(dst_reg);
+++			break;
+++		}
+++		dst_reg->umin_value *= umin_val;
+++		dst_reg->umax_value *= umax_val;
+++		if (dst_reg->umax_value > S64_MAX) {
+++			/* Overflow possible, we know nothing */
+++			dst_reg->smin_value = S64_MIN;
+++			dst_reg->smax_value = S64_MAX;
+++		} else {
+++			dst_reg->smin_value = dst_reg->umin_value;
+++			dst_reg->smax_value = dst_reg->umax_value;
+++		}
+++		break;
+++	case BPF_AND:
+++		if (src_known && dst_known) {
+++			__mark_reg_known(dst_reg, dst_reg->var_off.value &
+++						  src_reg.var_off.value);
+++			break;
+++		}
+++		/* We get our minimum from the var_off, since that's inherently
+++		 * bitwise.  Our maximum is the minimum of the operands' maxima.
+++		 */
+++		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
+++		dst_reg->umin_value = dst_reg->var_off.value;
+++		dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
+++		if (dst_reg->smin_value < 0 || smin_val < 0) {
+++			/* Lose signed bounds when ANDing negative numbers,
+++			 * ain't nobody got time for that.
+++			 */
+++			dst_reg->smin_value = S64_MIN;
+++			dst_reg->smax_value = S64_MAX;
+++		} else {
+++			/* ANDing two positives gives a positive, so safe to
+++			 * cast result into s64.
+++			 */
+++			dst_reg->smin_value = dst_reg->umin_value;
+++			dst_reg->smax_value = dst_reg->umax_value;
+++		}
+++		/* We may learn something more from the var_off */
+++		__update_reg_bounds(dst_reg);
+++		break;
+++	case BPF_OR:
+++		if (src_known && dst_known) {
+++			__mark_reg_known(dst_reg, dst_reg->var_off.value |
+++						  src_reg.var_off.value);
+++			break;
+++		}
+++		/* We get our maximum from the var_off, and our minimum is the
+++		 * maximum of the operands' minima
+++		 */
+++		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
+++		dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
+++		dst_reg->umax_value = dst_reg->var_off.value |
+++				      dst_reg->var_off.mask;
+++		if (dst_reg->smin_value < 0 || smin_val < 0) {
+++			/* Lose signed bounds when ORing negative numbers,
+++			 * ain't nobody got time for that.
+++			 */
+++			dst_reg->smin_value = S64_MIN;
+++			dst_reg->smax_value = S64_MAX;
+++		} else {
+++			/* ORing two positives gives a positive, so safe to
+++			 * cast result into s64.
+++			 */
+++			dst_reg->smin_value = dst_reg->umin_value;
+++			dst_reg->smax_value = dst_reg->umax_value;
+++		}
+++		/* We may learn something more from the var_off */
+++		__update_reg_bounds(dst_reg);
+++		break;
+++	case BPF_LSH:
+++		if (umax_val >= insn_bitness) {
+++			/* Shifts greater than 31 or 63 are undefined.
+++			 * This includes shifts by a negative number.
+++			 */
+++			mark_reg_unknown(env, regs, insn->dst_reg);
+++			break;
+++		}
+++		/* We lose all sign bit information (except what we can pick
+++		 * up from var_off)
+++		 */
+++		dst_reg->smin_value = S64_MIN;
+++		dst_reg->smax_value = S64_MAX;
+++		/* If we might shift our top bit out, then we know nothing */
+++		if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
+++			dst_reg->umin_value = 0;
+++			dst_reg->umax_value = U64_MAX;
+++		} else {
+++			dst_reg->umin_value <<= umin_val;
+++			dst_reg->umax_value <<= umax_val;
+++		}
+++		dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
+++		/* We may learn something more from the var_off */
+++		__update_reg_bounds(dst_reg);
+++		break;
+++	case BPF_RSH:
+++		if (umax_val >= insn_bitness) {
+++			/* Shifts greater than 31 or 63 are undefined.
+++			 * This includes shifts by a negative number.
+++			 */
+++			mark_reg_unknown(env, regs, insn->dst_reg);
+++			break;
+++		}
+++		/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
+++		 * be negative, then either:
+++		 * 1) src_reg might be zero, so the sign bit of the result is
+++		 *    unknown, so we lose our signed bounds
+++		 * 2) it's known negative, thus the unsigned bounds capture the
+++		 *    signed bounds
+++		 * 3) the signed bounds cross zero, so they tell us nothing
+++		 *    about the result
+++		 * If the value in dst_reg is known nonnegative, then again the
+++		 * unsigned bounts capture the signed bounds.
+++		 * Thus, in all cases it suffices to blow away our signed bounds
+++		 * and rely on inferring new ones from the unsigned bounds and
+++		 * var_off of the result.
+++		 */
+++		dst_reg->smin_value = S64_MIN;
+++		dst_reg->smax_value = S64_MAX;
+++		dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
+++		dst_reg->umin_value >>= umax_val;
+++		dst_reg->umax_value >>= umin_val;
+++		/* We may learn something more from the var_off */
+++		__update_reg_bounds(dst_reg);
+++		break;
+++	case BPF_ARSH:
+++		if (umax_val >= insn_bitness) {
+++			/* Shifts greater than 31 or 63 are undefined.
+++			 * This includes shifts by a negative number.
+++			 */
+++			mark_reg_unknown(env, regs, insn->dst_reg);
+++			break;
+++		}
+++
+++		/* Upon reaching here, src_known is true and
+++		 * umax_val is equal to umin_val.
+++		 */
+++		if (insn_bitness == 32) {
+++			dst_reg->smin_value = (u32)(((s32)dst_reg->smin_value) >> umin_val);
+++			dst_reg->smax_value = (u32)(((s32)dst_reg->smax_value) >> umin_val);
+++		} else {
+++			dst_reg->smin_value >>= umin_val;
+++			dst_reg->smax_value >>= umin_val;
+++		}
+++
+++		dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val,
+++						insn_bitness);
+++
+++		/* blow away the dst_reg umin_value/umax_value and rely on
+++		 * dst_reg var_off to refine the result.
+++		 */
+++		dst_reg->umin_value = 0;
+++		dst_reg->umax_value = U64_MAX;
+++		__update_reg_bounds(dst_reg);
+++		break;
+++	default:
+++		mark_reg_unknown(env, regs, insn->dst_reg);
+++		break;
+++	}
+++
+++	if (BPF_CLASS(insn->code) != BPF_ALU64) {
+++		/* 32-bit ALU ops are (32,32)->32 */
+++		coerce_reg_to_size(dst_reg, 4);
+++	}
+++
+++	__reg_deduce_bounds(dst_reg);
+++	__reg_bound_offset(dst_reg);
+++	return 0;
+++}
+++
+++/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
+++ * and var_off.
+++ */
+++static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
+++				   struct bpf_insn *insn)
+++{
+++	struct bpf_verifier_state *vstate = env->cur_state;
+++	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+++	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
+++	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
+++	u8 opcode = BPF_OP(insn->code);
+++	int err;
+++
+++	dst_reg = &regs[insn->dst_reg];
+++	src_reg = NULL;
+++	if (dst_reg->type != SCALAR_VALUE)
+++		ptr_reg = dst_reg;
+++	if (BPF_SRC(insn->code) == BPF_X) {
+++		src_reg = &regs[insn->src_reg];
+++		if (src_reg->type != SCALAR_VALUE) {
+++			if (dst_reg->type != SCALAR_VALUE) {
+++				/* Combining two pointers by any ALU op yields
+++				 * an arbitrary scalar. Disallow all math except
+++				 * pointer subtraction
+++				 */
+++				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
+++					mark_reg_unknown(env, regs, insn->dst_reg);
+++					return 0;
+++				}
+++				verbose(env, "R%d pointer %s pointer prohibited\n",
+++					insn->dst_reg,
+++					bpf_alu_string[opcode >> 4]);
+++				return -EACCES;
+++			} else {
+++				/* scalar += pointer
+++				 * This is legal, but we have to reverse our
+++				 * src/dest handling in computing the range
+++				 */
+++				err = mark_chain_precision(env, insn->dst_reg);
+++				if (err)
+++					return err;
+++				return adjust_ptr_min_max_vals(env, insn,
+++							       src_reg, dst_reg);
+++			}
+++		} else if (ptr_reg) {
+++			/* pointer += scalar */
+++			err = mark_chain_precision(env, insn->src_reg);
+++			if (err)
+++				return err;
+++			return adjust_ptr_min_max_vals(env, insn,
+++						       dst_reg, src_reg);
+++		}
+++	} else {
+++		/* Pretend the src is a reg with a known value, since we only
+++		 * need to be able to read from this state.
+++		 */
+++		off_reg.type = SCALAR_VALUE;
+++		__mark_reg_known(&off_reg, insn->imm);
+++		src_reg = &off_reg;
+++		if (ptr_reg) /* pointer += K */
+++			return adjust_ptr_min_max_vals(env, insn,
+++						       ptr_reg, src_reg);
+++	}
+++
+++	/* Got here implies adding two SCALAR_VALUEs */
+++	if (WARN_ON_ONCE(ptr_reg)) {
+++		print_verifier_state(env, state);
+++		verbose(env, "verifier internal error: unexpected ptr_reg\n");
+++		return -EINVAL;
+++	}
+++	if (WARN_ON(!src_reg)) {
+++		print_verifier_state(env, state);
+++		verbose(env, "verifier internal error: no src_reg\n");
+++		return -EINVAL;
+++	}
+++	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
+++}
+++
++ /* check validity of 32-bit and 64-bit arithmetic operations */
++-static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
+++static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
++ {
++-	struct reg_state *regs = env->cur_state.regs;
+++	struct bpf_reg_state *regs = cur_regs(env);
++ 	u8 opcode = BPF_OP(insn->code);
++ 	int err;
++ 
++@@ -1034,30 +5134,31 @@ static int check_alu_op(struct verifier_
++ 			if (BPF_SRC(insn->code) != 0 ||
++ 			    insn->src_reg != BPF_REG_0 ||
++ 			    insn->off != 0 || insn->imm != 0) {
++-				verbose("BPF_NEG uses reserved fields\n");
+++				verbose(env, "BPF_NEG uses reserved fields\n");
++ 				return -EINVAL;
++ 			}
++ 		} else {
++ 			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
++-			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
++-				verbose("BPF_END uses reserved fields\n");
+++			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
+++			    BPF_CLASS(insn->code) == BPF_ALU64) {
+++				verbose(env, "BPF_END uses reserved fields\n");
++ 				return -EINVAL;
++ 			}
++ 		}
++ 
++ 		/* check src operand */
++-		err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+++		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
++ 		if (err)
++ 			return err;
++ 
++ 		if (is_pointer_value(env, insn->dst_reg)) {
++-			verbose("R%d pointer arithmetic prohibited\n",
+++			verbose(env, "R%d pointer arithmetic prohibited\n",
++ 				insn->dst_reg);
++ 			return -EACCES;
++ 		}
++ 
++ 		/* check dest operand */
++-		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+++		err = check_reg_arg(env, insn->dst_reg, DST_OP);
++ 		if (err)
++ 			return err;
++ 
++@@ -1065,81 +5166,100 @@ static int check_alu_op(struct verifier_
++ 
++ 		if (BPF_SRC(insn->code) == BPF_X) {
++ 			if (insn->imm != 0 || insn->off != 0) {
++-				verbose("BPF_MOV uses reserved fields\n");
+++				verbose(env, "BPF_MOV uses reserved fields\n");
++ 				return -EINVAL;
++ 			}
++ 
++ 			/* check src operand */
++-			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+++			err = check_reg_arg(env, insn->src_reg, SRC_OP);
++ 			if (err)
++ 				return err;
++ 		} else {
++ 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
++-				verbose("BPF_MOV uses reserved fields\n");
+++				verbose(env, "BPF_MOV uses reserved fields\n");
++ 				return -EINVAL;
++ 			}
++ 		}
++ 
++-		/* check dest operand */
++-		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+++		/* check dest operand, mark as required later */
+++		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
++ 		if (err)
++ 			return err;
++ 
++ 		if (BPF_SRC(insn->code) == BPF_X) {
+++			struct bpf_reg_state *src_reg = regs + insn->src_reg;
+++			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
+++
++ 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
++ 				/* case: R1 = R2
++ 				 * copy register state to dest reg
++ 				 */
++-				regs[insn->dst_reg] = regs[insn->src_reg];
+++				*dst_reg = *src_reg;
+++				dst_reg->live |= REG_LIVE_WRITTEN;
+++				dst_reg->subreg_def = DEF_NOT_SUBREG;
++ 			} else {
+++				/* R1 = (u32) R2 */
++ 				if (is_pointer_value(env, insn->src_reg)) {
++-					verbose("R%d partial copy of pointer\n",
+++					verbose(env,
+++						"R%d partial copy of pointer\n",
++ 						insn->src_reg);
++ 					return -EACCES;
+++				} else if (src_reg->type == SCALAR_VALUE) {
+++					*dst_reg = *src_reg;
+++					dst_reg->live |= REG_LIVE_WRITTEN;
+++					dst_reg->subreg_def = env->insn_idx + 1;
+++				} else {
+++					mark_reg_unknown(env, regs,
+++							 insn->dst_reg);
++ 				}
++-				regs[insn->dst_reg].type = UNKNOWN_VALUE;
++-				regs[insn->dst_reg].map_ptr = NULL;
+++				coerce_reg_to_size(dst_reg, 4);
++ 			}
++ 		} else {
++ 			/* case: R = imm
++ 			 * remember the value we stored into this reg
++ 			 */
++-			regs[insn->dst_reg].type = CONST_IMM;
++-			regs[insn->dst_reg].imm = insn->imm;
+++			/* clear any state __mark_reg_known doesn't set */
+++			mark_reg_unknown(env, regs, insn->dst_reg);
+++			regs[insn->dst_reg].type = SCALAR_VALUE;
+++			if (BPF_CLASS(insn->code) == BPF_ALU64) {
+++				__mark_reg_known(regs + insn->dst_reg,
+++						 insn->imm);
+++			} else {
+++				__mark_reg_known(regs + insn->dst_reg,
+++						 (u32)insn->imm);
+++			}
++ 		}
++ 
++ 	} else if (opcode > BPF_END) {
++-		verbose("invalid BPF_ALU opcode %x\n", opcode);
+++		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
++ 		return -EINVAL;
++ 
++ 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
++ 
++-		bool stack_relative = false;
++-
++ 		if (BPF_SRC(insn->code) == BPF_X) {
++ 			if (insn->imm != 0 || insn->off != 0) {
++-				verbose("BPF_ALU uses reserved fields\n");
+++				verbose(env, "BPF_ALU uses reserved fields\n");
++ 				return -EINVAL;
++ 			}
++ 			/* check src1 operand */
++-			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+++			err = check_reg_arg(env, insn->src_reg, SRC_OP);
++ 			if (err)
++ 				return err;
++ 		} else {
++ 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
++-				verbose("BPF_ALU uses reserved fields\n");
+++				verbose(env, "BPF_ALU uses reserved fields\n");
++ 				return -EINVAL;
++ 			}
++ 		}
++ 
++ 		/* check src2 operand */
++-		err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+++		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
++ 		if (err)
++ 			return err;
++ 
++ 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
++ 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
++-			verbose("div by zero\n");
+++			verbose(env, "div by zero\n");
++ 			return -EINVAL;
++ 		}
++ 
++@@ -1148,185 +5268,980 @@ static int check_alu_op(struct verifier_
++ 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
++ 
++ 			if (insn->imm < 0 || insn->imm >= size) {
++-				verbose("invalid shift %d\n", insn->imm);
+++				verbose(env, "invalid shift %d\n", insn->imm);
++ 				return -EINVAL;
++ 			}
++ 		}
++ 
++-		/* pattern match 'bpf_add Rx, imm' instruction */
++-		if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
++-		    regs[insn->dst_reg].type == FRAME_PTR &&
++-		    BPF_SRC(insn->code) == BPF_K) {
++-			stack_relative = true;
++-		} else if (is_pointer_value(env, insn->dst_reg)) {
++-			verbose("R%d pointer arithmetic prohibited\n",
++-				insn->dst_reg);
++-			return -EACCES;
++-		} else if (BPF_SRC(insn->code) == BPF_X &&
++-			   is_pointer_value(env, insn->src_reg)) {
++-			verbose("R%d pointer arithmetic prohibited\n",
++-				insn->src_reg);
++-			return -EACCES;
++-		}
++-
++ 		/* check dest operand */
++-		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+++		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
++ 		if (err)
++ 			return err;
++ 
++-		if (stack_relative) {
++-			regs[insn->dst_reg].type = PTR_TO_STACK;
++-			regs[insn->dst_reg].imm = insn->imm;
++-		}
+++		return adjust_reg_min_max_vals(env, insn);
++ 	}
++ 
++ 	return 0;
++ }
++ 
++-static int check_cond_jmp_op(struct verifier_env *env,
+++static void __find_good_pkt_pointers(struct bpf_func_state *state,
+++				     struct bpf_reg_state *dst_reg,
+++				     enum bpf_reg_type type, u16 new_range)
+++{
+++	struct bpf_reg_state *reg;
+++	int i;
+++
+++	for (i = 0; i < MAX_BPF_REG; i++) {
+++		reg = &state->regs[i];
+++		if (reg->type == type && reg->id == dst_reg->id)
+++			/* keep the maximum range already checked */
+++			reg->range = max(reg->range, new_range);
+++	}
+++
+++	bpf_for_each_spilled_reg(i, state, reg) {
+++		if (!reg)
+++			continue;
+++		if (reg->type == type && reg->id == dst_reg->id)
+++			reg->range = max(reg->range, new_range);
+++	}
+++}
+++
+++static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
+++				   struct bpf_reg_state *dst_reg,
+++				   enum bpf_reg_type type,
+++				   bool range_right_open)
+++{
+++	u16 new_range;
+++	int i;
+++
+++	if (dst_reg->off < 0 ||
+++	    (dst_reg->off == 0 && range_right_open))
+++		/* This doesn't give us any range */
+++		return;
+++
+++	if (dst_reg->umax_value > MAX_PACKET_OFF ||
+++	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
+++		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
+++		 * than pkt_end, but that's because it's also less than pkt.
+++		 */
+++		return;
+++
+++	new_range = dst_reg->off;
+++	if (range_right_open)
+++		new_range--;
+++
+++	/* Examples for register markings:
+++	 *
+++	 * pkt_data in dst register:
+++	 *
+++	 *   r2 = r3;
+++	 *   r2 += 8;
+++	 *   if (r2 > pkt_end) goto <handle exception>
+++	 *   <access okay>
+++	 *
+++	 *   r2 = r3;
+++	 *   r2 += 8;
+++	 *   if (r2 < pkt_end) goto <access okay>
+++	 *   <handle exception>
+++	 *
+++	 *   Where:
+++	 *     r2 == dst_reg, pkt_end == src_reg
+++	 *     r2=pkt(id=n,off=8,r=0)
+++	 *     r3=pkt(id=n,off=0,r=0)
+++	 *
+++	 * pkt_data in src register:
+++	 *
+++	 *   r2 = r3;
+++	 *   r2 += 8;
+++	 *   if (pkt_end >= r2) goto <access okay>
+++	 *   <handle exception>
+++	 *
+++	 *   r2 = r3;
+++	 *   r2 += 8;
+++	 *   if (pkt_end <= r2) goto <handle exception>
+++	 *   <access okay>
+++	 *
+++	 *   Where:
+++	 *     pkt_end == dst_reg, r2 == src_reg
+++	 *     r2=pkt(id=n,off=8,r=0)
+++	 *     r3=pkt(id=n,off=0,r=0)
+++	 *
+++	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
+++	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
+++	 * and [r3, r3 + 8-1) respectively is safe to access depending on
+++	 * the check.
+++	 */
+++
+++	/* If our ids match, then we must have the same max_value.  And we
+++	 * don't care about the other reg's fixed offset, since if it's too big
+++	 * the range won't allow anything.
+++	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
+++	 */
+++	for (i = 0; i <= vstate->curframe; i++)
+++		__find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
+++					 new_range);
+++}
+++
+++/* compute branch direction of the expression "if (reg opcode val) goto target;"
+++ * and return:
+++ *  1 - branch will be taken and "goto target" will be executed
+++ *  0 - branch will not be taken and fall-through to next insn
+++ * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
+++ */
+++static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
+++			   bool is_jmp32)
+++{
+++	struct bpf_reg_state reg_lo;
+++	s64 sval;
+++
+++	if (__is_pointer_value(false, reg))
+++		return -1;
+++
+++	if (is_jmp32) {
+++		reg_lo = *reg;
+++		reg = &reg_lo;
+++		/* For JMP32, only low 32 bits are compared, coerce_reg_to_size
+++		 * could truncate high bits and update umin/umax according to
+++		 * information of low bits.
+++		 */
+++		coerce_reg_to_size(reg, 4);
+++		/* smin/smax need special handling. For example, after coerce,
+++		 * if smin_value is 0x00000000ffffffffLL, the value is -1 when
+++		 * used as operand to JMP32. It is a negative number from s32's
+++		 * point of view, while it is a positive number when seen as
+++		 * s64. The smin/smax are kept as s64, therefore, when used with
+++		 * JMP32, they need to be transformed into s32, then sign
+++		 * extended back to s64.
+++		 *
+++		 * Also, smin/smax were copied from umin/umax. If umin/umax has
+++		 * different sign bit, then min/max relationship doesn't
+++		 * maintain after casting into s32, for this case, set smin/smax
+++		 * to safest range.
+++		 */
+++		if ((reg->umax_value ^ reg->umin_value) &
+++		    (1ULL << 31)) {
+++			reg->smin_value = S32_MIN;
+++			reg->smax_value = S32_MAX;
+++		}
+++		reg->smin_value = (s64)(s32)reg->smin_value;
+++		reg->smax_value = (s64)(s32)reg->smax_value;
+++
+++		val = (u32)val;
+++		sval = (s64)(s32)val;
+++	} else {
+++		sval = (s64)val;
+++	}
+++
+++	switch (opcode) {
+++	case BPF_JEQ:
+++		if (tnum_is_const(reg->var_off))
+++			return !!tnum_equals_const(reg->var_off, val);
+++		break;
+++	case BPF_JNE:
+++		if (tnum_is_const(reg->var_off))
+++			return !tnum_equals_const(reg->var_off, val);
+++		break;
+++	case BPF_JSET:
+++		if ((~reg->var_off.mask & reg->var_off.value) & val)
+++			return 1;
+++		if (!((reg->var_off.mask | reg->var_off.value) & val))
+++			return 0;
+++		break;
+++	case BPF_JGT:
+++		if (reg->umin_value > val)
+++			return 1;
+++		else if (reg->umax_value <= val)
+++			return 0;
+++		break;
+++	case BPF_JSGT:
+++		if (reg->smin_value > sval)
+++			return 1;
+++		else if (reg->smax_value < sval)
+++			return 0;
+++		break;
+++	case BPF_JLT:
+++		if (reg->umax_value < val)
+++			return 1;
+++		else if (reg->umin_value >= val)
+++			return 0;
+++		break;
+++	case BPF_JSLT:
+++		if (reg->smax_value < sval)
+++			return 1;
+++		else if (reg->smin_value >= sval)
+++			return 0;
+++		break;
+++	case BPF_JGE:
+++		if (reg->umin_value >= val)
+++			return 1;
+++		else if (reg->umax_value < val)
+++			return 0;
+++		break;
+++	case BPF_JSGE:
+++		if (reg->smin_value >= sval)
+++			return 1;
+++		else if (reg->smax_value < sval)
+++			return 0;
+++		break;
+++	case BPF_JLE:
+++		if (reg->umax_value <= val)
+++			return 1;
+++		else if (reg->umin_value > val)
+++			return 0;
+++		break;
+++	case BPF_JSLE:
+++		if (reg->smax_value <= sval)
+++			return 1;
+++		else if (reg->smin_value > sval)
+++			return 0;
+++		break;
+++	}
+++
+++	return -1;
+++}
+++
+++/* Generate min value of the high 32-bit from TNUM info. */
+++static u64 gen_hi_min(struct tnum var)
+++{
+++	return var.value & ~0xffffffffULL;
+++}
+++
+++/* Generate max value of the high 32-bit from TNUM info. */
+++static u64 gen_hi_max(struct tnum var)
+++{
+++	return (var.value | var.mask) & ~0xffffffffULL;
+++}
+++
+++/* Return true if VAL is compared with a s64 sign extended from s32, and they
+++ * are with the same signedness.
+++ */
+++static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg)
+++{
+++	return ((s32)sval >= 0 &&
+++		reg->smin_value >= 0 && reg->smax_value <= S32_MAX) ||
+++	       ((s32)sval < 0 &&
+++		reg->smax_value <= 0 && reg->smin_value >= S32_MIN);
+++}
+++
+++/* Constrain the possible values of @reg with unsigned upper bound @bound.
+++ * If @is_exclusive, @bound is an exclusive limit, otherwise it is inclusive.
+++ * If @is_jmp32, @bound is a 32-bit value that only constrains the low 32 bits
+++ * of @reg.
+++ */
+++static void set_upper_bound(struct bpf_reg_state *reg, u64 bound, bool is_jmp32,
+++			    bool is_exclusive)
+++{
+++	if (is_exclusive) {
+++		/* There are no values for `reg` that make `reg<0` true. */
+++		if (bound == 0)
+++			return;
+++		bound--;
+++	}
+++	if (is_jmp32) {
+++		/* Constrain the register's value in the tnum representation.
+++		 * For 64-bit comparisons this happens later in
+++		 * __reg_bound_offset(), but for 32-bit comparisons, we can be
+++		 * more precise than what can be derived from the updated
+++		 * numeric bounds.
+++		 */
+++		struct tnum t = tnum_range(0, bound);
+++
+++		t.mask |= ~0xffffffffULL; /* upper half is unknown */
+++		reg->var_off = tnum_intersect(reg->var_off, t);
+++
+++		/* Compute the 64-bit bound from the 32-bit bound. */
+++		bound += gen_hi_max(reg->var_off);
+++	}
+++	reg->umax_value = min(reg->umax_value, bound);
+++}
+++
+++/* Constrain the possible values of @reg with unsigned lower bound @bound.
+++ * If @is_exclusive, @bound is an exclusive limit, otherwise it is inclusive.
+++ * If @is_jmp32, @bound is a 32-bit value that only constrains the low 32 bits
+++ * of @reg.
+++ */
+++static void set_lower_bound(struct bpf_reg_state *reg, u64 bound, bool is_jmp32,
+++			    bool is_exclusive)
+++{
+++	if (is_exclusive) {
+++		/* There are no values for `reg` that make `reg>MAX` true. */
+++		if (bound == (is_jmp32 ? U32_MAX : U64_MAX))
+++			return;
+++		bound++;
+++	}
+++	if (is_jmp32) {
+++		/* Constrain the register's value in the tnum representation.
+++		 * For 64-bit comparisons this happens later in
+++		 * __reg_bound_offset(), but for 32-bit comparisons, we can be
+++		 * more precise than what can be derived from the updated
+++		 * numeric bounds.
+++		 */
+++		struct tnum t = tnum_range(bound, U32_MAX);
+++
+++		t.mask |= ~0xffffffffULL; /* upper half is unknown */
+++		reg->var_off = tnum_intersect(reg->var_off, t);
+++
+++		/* Compute the 64-bit bound from the 32-bit bound. */
+++		bound += gen_hi_min(reg->var_off);
+++	}
+++	reg->umin_value = max(reg->umin_value, bound);
+++}
+++
+++/* Adjusts the register min/max values in the case that the dst_reg is the
+++ * variable register that we are working on, and src_reg is a constant or we're
+++ * simply doing a BPF_K check.
+++ * In JEQ/JNE cases we also adjust the var_off values.
+++ */
+++static void reg_set_min_max(struct bpf_reg_state *true_reg,
+++			    struct bpf_reg_state *false_reg, u64 val,
+++			    u8 opcode, bool is_jmp32)
+++{
+++	s64 sval;
+++
+++	/* If the dst_reg is a pointer, we can't learn anything about its
+++	 * variable offset from the compare (unless src_reg were a pointer into
+++	 * the same object, but we don't bother with that.
+++	 * Since false_reg and true_reg have the same type by construction, we
+++	 * only need to check one of them for pointerness.
+++	 */
+++	if (__is_pointer_value(false, false_reg))
+++		return;
+++
+++	val = is_jmp32 ? (u32)val : val;
+++	sval = is_jmp32 ? (s64)(s32)val : (s64)val;
+++
+++	switch (opcode) {
+++	case BPF_JEQ:
+++	case BPF_JNE:
+++	{
+++		struct bpf_reg_state *reg =
+++			opcode == BPF_JEQ ? true_reg : false_reg;
+++
+++		/* For BPF_JEQ, if this is false we know nothing Jon Snow, but
+++		 * if it is true we know the value for sure. Likewise for
+++		 * BPF_JNE.
+++		 */
+++		if (is_jmp32) {
+++			u64 old_v = reg->var_off.value;
+++			u64 hi_mask = ~0xffffffffULL;
+++
+++			reg->var_off.value = (old_v & hi_mask) | val;
+++			reg->var_off.mask &= hi_mask;
+++		} else {
+++			__mark_reg_known(reg, val);
+++		}
+++		break;
+++	}
+++	case BPF_JSET:
+++		false_reg->var_off = tnum_and(false_reg->var_off,
+++					      tnum_const(~val));
+++		if (is_power_of_2(val))
+++			true_reg->var_off = tnum_or(true_reg->var_off,
+++						    tnum_const(val));
+++		break;
+++	case BPF_JGE:
+++	case BPF_JGT:
+++	{
+++		set_upper_bound(false_reg, val, is_jmp32, opcode == BPF_JGE);
+++		set_lower_bound(true_reg, val, is_jmp32, opcode == BPF_JGT);
+++		break;
+++	}
+++	case BPF_JSGE:
+++	case BPF_JSGT:
+++	{
+++		s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
+++		s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
+++
+++		/* If the full s64 was not sign-extended from s32 then don't
+++		 * deduct further info.
+++		 */
+++		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
+++			break;
+++		false_reg->smax_value = min(false_reg->smax_value, false_smax);
+++		true_reg->smin_value = max(true_reg->smin_value, true_smin);
+++		break;
+++	}
+++	case BPF_JLE:
+++	case BPF_JLT:
+++	{
+++		set_lower_bound(false_reg, val, is_jmp32, opcode == BPF_JLE);
+++		set_upper_bound(true_reg, val, is_jmp32, opcode == BPF_JLT);
+++		break;
+++	}
+++	case BPF_JSLE:
+++	case BPF_JSLT:
+++	{
+++		s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
+++		s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
+++
+++		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
+++			break;
+++		false_reg->smin_value = max(false_reg->smin_value, false_smin);
+++		true_reg->smax_value = min(true_reg->smax_value, true_smax);
+++		break;
+++	}
+++	default:
+++		break;
+++	}
+++
+++	__reg_deduce_bounds(false_reg);
+++	__reg_deduce_bounds(true_reg);
+++	/* We might have learned some bits from the bounds. */
+++	__reg_bound_offset(false_reg);
+++	__reg_bound_offset(true_reg);
+++	/* Intersecting with the old var_off might have improved our bounds
+++	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
+++	 * then new var_off is (0; 0x7f...fc) which improves our umax.
+++	 */
+++	__update_reg_bounds(false_reg);
+++	__update_reg_bounds(true_reg);
+++}
+++
+++/* Same as above, but for the case that dst_reg holds a constant and src_reg is
+++ * the variable reg.
+++ */
+++static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
+++				struct bpf_reg_state *false_reg, u64 val,
+++				u8 opcode, bool is_jmp32)
+++{
+++	s64 sval;
+++
+++	if (__is_pointer_value(false, false_reg))
+++		return;
+++
+++	val = is_jmp32 ? (u32)val : val;
+++	sval = is_jmp32 ? (s64)(s32)val : (s64)val;
+++
+++	switch (opcode) {
+++	case BPF_JEQ:
+++	case BPF_JNE:
+++	{
+++		struct bpf_reg_state *reg =
+++			opcode == BPF_JEQ ? true_reg : false_reg;
+++
+++		if (is_jmp32) {
+++			u64 old_v = reg->var_off.value;
+++			u64 hi_mask = ~0xffffffffULL;
+++
+++			reg->var_off.value = (old_v & hi_mask) | val;
+++			reg->var_off.mask &= hi_mask;
+++		} else {
+++			__mark_reg_known(reg, val);
+++		}
+++		break;
+++	}
+++	case BPF_JSET:
+++		false_reg->var_off = tnum_and(false_reg->var_off,
+++					      tnum_const(~val));
+++		if (is_power_of_2(val))
+++			true_reg->var_off = tnum_or(true_reg->var_off,
+++						    tnum_const(val));
+++		break;
+++	case BPF_JGE:
+++	case BPF_JGT:
+++	{
+++		set_lower_bound(false_reg, val, is_jmp32, opcode == BPF_JGE);
+++		set_upper_bound(true_reg, val, is_jmp32, opcode == BPF_JGT);
+++		break;
+++	}
+++	case BPF_JSGE:
+++	case BPF_JSGT:
+++	{
+++		s64 false_smin = opcode == BPF_JSGT ? sval    : sval + 1;
+++		s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval;
+++
+++		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
+++			break;
+++		false_reg->smin_value = max(false_reg->smin_value, false_smin);
+++		true_reg->smax_value = min(true_reg->smax_value, true_smax);
+++		break;
+++	}
+++	case BPF_JLE:
+++	case BPF_JLT:
+++	{
+++		set_upper_bound(false_reg, val, is_jmp32, opcode == BPF_JLE);
+++		set_lower_bound(true_reg, val, is_jmp32, opcode == BPF_JLT);
+++		break;
+++	}
+++	case BPF_JSLE:
+++	case BPF_JSLT:
+++	{
+++		s64 false_smax = opcode == BPF_JSLT ? sval    : sval - 1;
+++		s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval;
+++
+++		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
+++			break;
+++		false_reg->smax_value = min(false_reg->smax_value, false_smax);
+++		true_reg->smin_value = max(true_reg->smin_value, true_smin);
+++		break;
+++	}
+++	default:
+++		break;
+++	}
+++
+++	__reg_deduce_bounds(false_reg);
+++	__reg_deduce_bounds(true_reg);
+++	/* We might have learned some bits from the bounds. */
+++	__reg_bound_offset(false_reg);
+++	__reg_bound_offset(true_reg);
+++	/* Intersecting with the old var_off might have improved our bounds
+++	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
+++	 * then new var_off is (0; 0x7f...fc) which improves our umax.
+++	 */
+++	__update_reg_bounds(false_reg);
+++	__update_reg_bounds(true_reg);
+++}
+++
+++/* Regs are known to be equal, so intersect their min/max/var_off */
+++static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
+++				  struct bpf_reg_state *dst_reg)
+++{
+++	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
+++							dst_reg->umin_value);
+++	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
+++							dst_reg->umax_value);
+++	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
+++							dst_reg->smin_value);
+++	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
+++							dst_reg->smax_value);
+++	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
+++							     dst_reg->var_off);
+++	/* We might have learned new bounds from the var_off. */
+++	__update_reg_bounds(src_reg);
+++	__update_reg_bounds(dst_reg);
+++	/* We might have learned something about the sign bit. */
+++	__reg_deduce_bounds(src_reg);
+++	__reg_deduce_bounds(dst_reg);
+++	/* We might have learned some bits from the bounds. */
+++	__reg_bound_offset(src_reg);
+++	__reg_bound_offset(dst_reg);
+++	/* Intersecting with the old var_off might have improved our bounds
+++	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
+++	 * then new var_off is (0; 0x7f...fc) which improves our umax.
+++	 */
+++	__update_reg_bounds(src_reg);
+++	__update_reg_bounds(dst_reg);
+++}
+++
+++static void reg_combine_min_max(struct bpf_reg_state *true_src,
+++				struct bpf_reg_state *true_dst,
+++				struct bpf_reg_state *false_src,
+++				struct bpf_reg_state *false_dst,
+++				u8 opcode)
+++{
+++	switch (opcode) {
+++	case BPF_JEQ:
+++		__reg_combine_min_max(true_src, true_dst);
+++		break;
+++	case BPF_JNE:
+++		__reg_combine_min_max(false_src, false_dst);
+++		break;
+++	}
+++}
+++
+++static void mark_ptr_or_null_reg(struct bpf_func_state *state,
+++				 struct bpf_reg_state *reg, u32 id,
+++				 bool is_null)
+++{
+++	if (reg_type_may_be_null(reg->type) && reg->id == id) {
+++		/* Old offset (both fixed and variable parts) should
+++		 * have been known-zero, because we don't allow pointer
+++		 * arithmetic on pointers that might be NULL.
+++		 */
+++		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
+++				 !tnum_equals_const(reg->var_off, 0) ||
+++				 reg->off)) {
+++			__mark_reg_known_zero(reg);
+++			reg->off = 0;
+++		}
+++		if (is_null) {
+++			reg->type = SCALAR_VALUE;
+++		} else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
+++			if (reg->map_ptr->inner_map_meta) {
+++				reg->type = CONST_PTR_TO_MAP;
+++				reg->map_ptr = reg->map_ptr->inner_map_meta;
+++			} else if (reg->map_ptr->map_type ==
+++				   BPF_MAP_TYPE_XSKMAP) {
+++				reg->type = PTR_TO_XDP_SOCK;
+++			} else {
+++				reg->type = PTR_TO_MAP_VALUE;
+++			}
+++		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
+++			reg->type = PTR_TO_SOCKET;
+++		} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
+++			reg->type = PTR_TO_SOCK_COMMON;
+++		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
+++			reg->type = PTR_TO_TCP_SOCK;
+++		}
+++		if (is_null) {
+++			/* We don't need id and ref_obj_id from this point
+++			 * onwards anymore, thus we should better reset it,
+++			 * so that state pruning has chances to take effect.
+++			 */
+++			reg->id = 0;
+++			reg->ref_obj_id = 0;
+++		} else if (!reg_may_point_to_spin_lock(reg)) {
+++			/* For not-NULL ptr, reg->ref_obj_id will be reset
+++			 * in release_reg_references().
+++			 *
+++			 * reg->id is still used by spin_lock ptr. Other
+++			 * than spin_lock ptr type, reg->id can be reset.
+++			 */
+++			reg->id = 0;
+++		}
+++	}
+++}
+++
+++static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
+++				    bool is_null)
+++{
+++	struct bpf_reg_state *reg;
+++	int i;
+++
+++	for (i = 0; i < MAX_BPF_REG; i++)
+++		mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
+++
+++	bpf_for_each_spilled_reg(i, state, reg) {
+++		if (!reg)
+++			continue;
+++		mark_ptr_or_null_reg(state, reg, id, is_null);
+++	}
+++}
+++
+++/* The logic is similar to find_good_pkt_pointers(), both could eventually
+++ * be folded together at some point.
+++ */
+++static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
+++				  bool is_null)
+++{
+++	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+++	struct bpf_reg_state *regs = state->regs;
+++	u32 ref_obj_id = regs[regno].ref_obj_id;
+++	u32 id = regs[regno].id;
+++	int i;
+++
+++	if (ref_obj_id && ref_obj_id == id && is_null)
+++		/* regs[regno] is in the " == NULL" branch.
+++		 * No one could have freed the reference state before
+++		 * doing the NULL check.
+++		 */
+++		WARN_ON_ONCE(release_reference_state(state, id));
+++
+++	for (i = 0; i <= vstate->curframe; i++)
+++		__mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
+++}
+++
+++static bool try_match_pkt_pointers(const struct bpf_insn *insn,
+++				   struct bpf_reg_state *dst_reg,
+++				   struct bpf_reg_state *src_reg,
+++				   struct bpf_verifier_state *this_branch,
+++				   struct bpf_verifier_state *other_branch)
+++{
+++	if (BPF_SRC(insn->code) != BPF_X)
+++		return false;
+++
+++	/* Pointers are always 64-bit. */
+++	if (BPF_CLASS(insn->code) == BPF_JMP32)
+++		return false;
+++
+++	switch (BPF_OP(insn->code)) {
+++	case BPF_JGT:
+++		if ((dst_reg->type == PTR_TO_PACKET &&
+++		     src_reg->type == PTR_TO_PACKET_END) ||
+++		    (dst_reg->type == PTR_TO_PACKET_META &&
+++		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
+++			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
+++			find_good_pkt_pointers(this_branch, dst_reg,
+++					       dst_reg->type, false);
+++		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
+++			    src_reg->type == PTR_TO_PACKET) ||
+++			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
+++			    src_reg->type == PTR_TO_PACKET_META)) {
+++			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
+++			find_good_pkt_pointers(other_branch, src_reg,
+++					       src_reg->type, true);
+++		} else {
+++			return false;
+++		}
+++		break;
+++	case BPF_JLT:
+++		if ((dst_reg->type == PTR_TO_PACKET &&
+++		     src_reg->type == PTR_TO_PACKET_END) ||
+++		    (dst_reg->type == PTR_TO_PACKET_META &&
+++		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
+++			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
+++			find_good_pkt_pointers(other_branch, dst_reg,
+++					       dst_reg->type, true);
+++		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
+++			    src_reg->type == PTR_TO_PACKET) ||
+++			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
+++			    src_reg->type == PTR_TO_PACKET_META)) {
+++			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
+++			find_good_pkt_pointers(this_branch, src_reg,
+++					       src_reg->type, false);
+++		} else {
+++			return false;
+++		}
+++		break;
+++	case BPF_JGE:
+++		if ((dst_reg->type == PTR_TO_PACKET &&
+++		     src_reg->type == PTR_TO_PACKET_END) ||
+++		    (dst_reg->type == PTR_TO_PACKET_META &&
+++		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
+++			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
+++			find_good_pkt_pointers(this_branch, dst_reg,
+++					       dst_reg->type, true);
+++		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
+++			    src_reg->type == PTR_TO_PACKET) ||
+++			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
+++			    src_reg->type == PTR_TO_PACKET_META)) {
+++			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
+++			find_good_pkt_pointers(other_branch, src_reg,
+++					       src_reg->type, false);
+++		} else {
+++			return false;
+++		}
+++		break;
+++	case BPF_JLE:
+++		if ((dst_reg->type == PTR_TO_PACKET &&
+++		     src_reg->type == PTR_TO_PACKET_END) ||
+++		    (dst_reg->type == PTR_TO_PACKET_META &&
+++		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
+++			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
+++			find_good_pkt_pointers(other_branch, dst_reg,
+++					       dst_reg->type, false);
+++		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
+++			    src_reg->type == PTR_TO_PACKET) ||
+++			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
+++			    src_reg->type == PTR_TO_PACKET_META)) {
+++			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
+++			find_good_pkt_pointers(this_branch, src_reg,
+++					       src_reg->type, true);
+++		} else {
+++			return false;
+++		}
+++		break;
+++	default:
+++		return false;
+++	}
+++
+++	return true;
+++}
+++
+++static int check_cond_jmp_op(struct bpf_verifier_env *env,
++ 			     struct bpf_insn *insn, int *insn_idx)
++ {
++-	struct reg_state *regs = env->cur_state.regs;
++-	struct verifier_state *other_branch;
+++	struct bpf_verifier_state *this_branch = env->cur_state;
+++	struct bpf_verifier_state *other_branch;
+++	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
+++	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
++ 	u8 opcode = BPF_OP(insn->code);
+++	bool is_jmp32;
+++	int pred = -1;
++ 	int err;
++ 
++-	if (opcode > BPF_EXIT) {
++-		verbose("invalid BPF_JMP opcode %x\n", opcode);
+++	/* Only conditional jumps are expected to reach here. */
+++	if (opcode == BPF_JA || opcode > BPF_JSLE) {
+++		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
++ 		return -EINVAL;
++ 	}
++ 
++ 	if (BPF_SRC(insn->code) == BPF_X) {
++ 		if (insn->imm != 0) {
++-			verbose("BPF_JMP uses reserved fields\n");
+++			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
++ 			return -EINVAL;
++ 		}
++ 
++ 		/* check src1 operand */
++-		err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+++		err = check_reg_arg(env, insn->src_reg, SRC_OP);
++ 		if (err)
++ 			return err;
++ 
++ 		if (is_pointer_value(env, insn->src_reg)) {
++-			verbose("R%d pointer comparison prohibited\n",
+++			verbose(env, "R%d pointer comparison prohibited\n",
++ 				insn->src_reg);
++ 			return -EACCES;
++ 		}
+++		src_reg = &regs[insn->src_reg];
++ 	} else {
++ 		if (insn->src_reg != BPF_REG_0) {
++-			verbose("BPF_JMP uses reserved fields\n");
+++			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
++ 			return -EINVAL;
++ 		}
++ 	}
++ 
++ 	/* check src2 operand */
++-	err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+++	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
++ 	if (err)
++ 		return err;
++ 
++-	/* detect if R == 0 where R was initialized to zero earlier */
++-	if (BPF_SRC(insn->code) == BPF_K &&
++-	    (opcode == BPF_JEQ || opcode == BPF_JNE) &&
++-	    regs[insn->dst_reg].type == CONST_IMM &&
++-	    regs[insn->dst_reg].imm == insn->imm) {
++-		if (opcode == BPF_JEQ) {
++-			/* if (imm == imm) goto pc+off;
++-			 * only follow the goto, ignore fall-through
++-			 */
++-			*insn_idx += insn->off;
++-			return 0;
++-		} else {
++-			/* if (imm != imm) goto pc+off;
++-			 * only follow fall-through branch, since
++-			 * that's where the program will go
++-			 */
++-			return 0;
++-		}
+++	dst_reg = &regs[insn->dst_reg];
+++	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
+++
+++	if (BPF_SRC(insn->code) == BPF_K)
+++		pred = is_branch_taken(dst_reg, insn->imm,
+++				       opcode, is_jmp32);
+++	else if (src_reg->type == SCALAR_VALUE &&
+++		 tnum_is_const(src_reg->var_off))
+++		pred = is_branch_taken(dst_reg, src_reg->var_off.value,
+++				       opcode, is_jmp32);
+++	if (pred >= 0) {
+++		err = mark_chain_precision(env, insn->dst_reg);
+++		if (BPF_SRC(insn->code) == BPF_X && !err)
+++			err = mark_chain_precision(env, insn->src_reg);
+++		if (err)
+++			return err;
+++	}
+++
+++	if (pred == 1) {
+++		/* Only follow the goto, ignore fall-through. If needed, push
+++		 * the fall-through branch for simulation under speculative
+++		 * execution.
+++		 */
+++		if (!env->allow_ptr_leaks &&
+++		    !sanitize_speculative_path(env, insn, *insn_idx + 1,
+++					       *insn_idx))
+++			return -EFAULT;
+++		*insn_idx += insn->off;
+++		return 0;
+++	} else if (pred == 0) {
+++		/* Only follow the fall-through branch, since that's where the
+++		 * program will go. If needed, push the goto branch for
+++		 * simulation under speculative execution.
+++		 */
+++		if (!env->allow_ptr_leaks &&
+++		    !sanitize_speculative_path(env, insn,
+++					       *insn_idx + insn->off + 1,
+++					       *insn_idx))
+++			return -EFAULT;
+++		return 0;
++ 	}
++ 
++-	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
+++	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
+++				  false);
++ 	if (!other_branch)
++ 		return -EFAULT;
+++	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
++ 
++-	/* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
++-	if (BPF_SRC(insn->code) == BPF_K &&
++-	    insn->imm == 0 && (opcode == BPF_JEQ ||
++-			       opcode == BPF_JNE) &&
++-	    regs[insn->dst_reg].type == PTR_TO_MAP_VALUE_OR_NULL) {
++-		if (opcode == BPF_JEQ) {
++-			/* next fallthrough insn can access memory via
++-			 * this register
++-			 */
++-			regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
++-			/* branch targer cannot access it, since reg == 0 */
++-			other_branch->regs[insn->dst_reg].type = CONST_IMM;
++-			other_branch->regs[insn->dst_reg].imm = 0;
++-		} else {
++-			other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
++-			regs[insn->dst_reg].type = CONST_IMM;
++-			regs[insn->dst_reg].imm = 0;
++-		}
++-	} else if (is_pointer_value(env, insn->dst_reg)) {
++-		verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
+++	/* detect if we are comparing against a constant value so we can adjust
+++	 * our min/max values for our dst register.
+++	 * this is only legit if both are scalars (or pointers to the same
+++	 * object, I suppose, but we don't support that right now), because
+++	 * otherwise the different base pointers mean the offsets aren't
+++	 * comparable.
+++	 */
+++	if (BPF_SRC(insn->code) == BPF_X) {
+++		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
+++		struct bpf_reg_state lo_reg0 = *dst_reg;
+++		struct bpf_reg_state lo_reg1 = *src_reg;
+++		struct bpf_reg_state *src_lo, *dst_lo;
+++
+++		dst_lo = &lo_reg0;
+++		src_lo = &lo_reg1;
+++		coerce_reg_to_size(dst_lo, 4);
+++		coerce_reg_to_size(src_lo, 4);
+++
+++		if (dst_reg->type == SCALAR_VALUE &&
+++		    src_reg->type == SCALAR_VALUE) {
+++			if (tnum_is_const(src_reg->var_off) ||
+++			    (is_jmp32 && tnum_is_const(src_lo->var_off)))
+++				reg_set_min_max(&other_branch_regs[insn->dst_reg],
+++						dst_reg,
+++						is_jmp32
+++						? src_lo->var_off.value
+++						: src_reg->var_off.value,
+++						opcode, is_jmp32);
+++			else if (tnum_is_const(dst_reg->var_off) ||
+++				 (is_jmp32 && tnum_is_const(dst_lo->var_off)))
+++				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
+++						    src_reg,
+++						    is_jmp32
+++						    ? dst_lo->var_off.value
+++						    : dst_reg->var_off.value,
+++						    opcode, is_jmp32);
+++			else if (!is_jmp32 &&
+++				 (opcode == BPF_JEQ || opcode == BPF_JNE))
+++				/* Comparing for equality, we can combine knowledge */
+++				reg_combine_min_max(&other_branch_regs[insn->src_reg],
+++						    &other_branch_regs[insn->dst_reg],
+++						    src_reg, dst_reg, opcode);
+++		}
+++	} else if (dst_reg->type == SCALAR_VALUE) {
+++		reg_set_min_max(&other_branch_regs[insn->dst_reg],
+++					dst_reg, insn->imm, opcode, is_jmp32);
+++	}
+++
+++	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
+++	 * NOTE: these optimizations below are related with pointer comparison
+++	 *       which will never be JMP32.
+++	 */
+++	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
+++	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
+++	    reg_type_may_be_null(dst_reg->type)) {
+++		/* Mark all identical registers in each branch as either
+++		 * safe or unknown depending R == 0 or R != 0 conditional.
+++		 */
+++		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
+++				      opcode == BPF_JNE);
+++		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
+++				      opcode == BPF_JEQ);
+++	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
+++					   this_branch, other_branch) &&
+++		   is_pointer_value(env, insn->dst_reg)) {
+++		verbose(env, "R%d pointer comparison prohibited\n",
+++			insn->dst_reg);
++ 		return -EACCES;
++-	} else if (BPF_SRC(insn->code) == BPF_K &&
++-		   (opcode == BPF_JEQ || opcode == BPF_JNE)) {
++-
++-		if (opcode == BPF_JEQ) {
++-			/* detect if (R == imm) goto
++-			 * and in the target state recognize that R = imm
++-			 */
++-			other_branch->regs[insn->dst_reg].type = CONST_IMM;
++-			other_branch->regs[insn->dst_reg].imm = insn->imm;
++-		} else {
++-			/* detect if (R != imm) goto
++-			 * and in the fall-through state recognize that R = imm
++-			 */
++-			regs[insn->dst_reg].type = CONST_IMM;
++-			regs[insn->dst_reg].imm = insn->imm;
++-		}
++ 	}
++-	if (log_level)
++-		print_verifier_state(env);
+++	if (env->log.level & BPF_LOG_LEVEL)
+++		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
++ 	return 0;
++ }
++ 
++-/* return the map pointer stored inside BPF_LD_IMM64 instruction */
++-static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
++-{
++-	u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
++-
++-	return (struct bpf_map *) (unsigned long) imm64;
++-}
++-
++ /* verify BPF_LD_IMM64 instruction */
++-static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
+++static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
++ {
++-	struct reg_state *regs = env->cur_state.regs;
+++	struct bpf_insn_aux_data *aux = cur_aux(env);
+++	struct bpf_reg_state *regs = cur_regs(env);
+++	struct bpf_map *map;
++ 	int err;
++ 
++ 	if (BPF_SIZE(insn->code) != BPF_DW) {
++-		verbose("invalid BPF_LD_IMM insn\n");
+++		verbose(env, "invalid BPF_LD_IMM insn\n");
++ 		return -EINVAL;
++ 	}
++ 	if (insn->off != 0) {
++-		verbose("BPF_LD_IMM64 uses reserved fields\n");
+++		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
++ 		return -EINVAL;
++ 	}
++ 
++-	err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+++	err = check_reg_arg(env, insn->dst_reg, DST_OP);
++ 	if (err)
++ 		return err;
++ 
++-	if (insn->src_reg == 0)
++-		/* generic move 64-bit immediate into a register */
+++	if (insn->src_reg == 0) {
+++		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+++
+++		regs[insn->dst_reg].type = SCALAR_VALUE;
+++		__mark_reg_known(&regs[insn->dst_reg], imm);
++ 		return 0;
+++	}
++ 
++-	/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
++-	BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
+++	map = env->used_maps[aux->map_index];
+++	mark_reg_known_zero(env, regs, insn->dst_reg);
+++	regs[insn->dst_reg].map_ptr = map;
+++
+++	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
+++		regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
+++		regs[insn->dst_reg].off = aux->map_off;
+++		if (map_value_has_spin_lock(map))
+++			regs[insn->dst_reg].id = ++env->id_gen;
+++	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
+++		regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
+++	} else {
+++		verbose(env, "bpf verifier is misconfigured\n");
+++		return -EINVAL;
+++	}
++ 
++-	regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
++-	regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
++ 	return 0;
++ }
++ 
++@@ -1357,53 +6272,148 @@ static bool may_access_skb(enum bpf_prog
++  * Output:
++  *   R0 - 8/16/32-bit skb data converted to cpu endianness
++  */
++-static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
+++static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
++ {
++-	struct reg_state *regs = env->cur_state.regs;
+++	struct bpf_reg_state *regs = cur_regs(env);
+++	static const int ctx_reg = BPF_REG_6;
++ 	u8 mode = BPF_MODE(insn->code);
++-	struct reg_state *reg;
++ 	int i, err;
++ 
++ 	if (!may_access_skb(env->prog->type)) {
++-		verbose("BPF_LD_ABS|IND instructions not allowed for this program type\n");
+++		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
+++		return -EINVAL;
+++	}
+++
+++	if (!env->ops->gen_ld_abs) {
+++		verbose(env, "bpf verifier is misconfigured\n");
+++		return -EINVAL;
+++	}
+++
+++	if (env->subprog_cnt > 1) {
+++		/* when program has LD_ABS insn JITs and interpreter assume
+++		 * that r1 == ctx == skb which is not the case for callees
+++		 * that can have arbitrary arguments. It's problematic
+++		 * for main prog as well since JITs would need to analyze
+++		 * all functions in order to make proper register save/restore
+++		 * decisions in the main prog. Hence disallow LD_ABS with calls
+++		 */
+++		verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
++ 		return -EINVAL;
++ 	}
++ 
++ 	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
++ 	    BPF_SIZE(insn->code) == BPF_DW ||
++ 	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
++-		verbose("BPF_LD_ABS uses reserved fields\n");
+++		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
++ 		return -EINVAL;
++ 	}
++ 
++ 	/* check whether implicit source operand (register R6) is readable */
++-	err = check_reg_arg(regs, BPF_REG_6, SRC_OP);
+++	err = check_reg_arg(env, ctx_reg, SRC_OP);
++ 	if (err)
++ 		return err;
++ 
++-	if (regs[BPF_REG_6].type != PTR_TO_CTX) {
++-		verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
+++	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
+++	 * gen_ld_abs() may terminate the program at runtime, leading to
+++	 * reference leak.
+++	 */
+++	err = check_reference_leak(env);
+++	if (err) {
+++		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
+++		return err;
+++	}
+++
+++	if (env->cur_state->active_spin_lock) {
+++		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
+++		return -EINVAL;
+++	}
+++
+++	if (regs[ctx_reg].type != PTR_TO_CTX) {
+++		verbose(env,
+++			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
++ 		return -EINVAL;
++ 	}
++ 
++ 	if (mode == BPF_IND) {
++ 		/* check explicit source operand */
++-		err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+++		err = check_reg_arg(env, insn->src_reg, SRC_OP);
++ 		if (err)
++ 			return err;
++ 	}
++ 
+++	err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
+++	if (err < 0)
+++		return err;
+++
++ 	/* reset caller saved regs to unreadable */
++ 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
++-		reg = regs + caller_saved[i];
++-		reg->type = NOT_INIT;
++-		reg->imm = 0;
+++		mark_reg_not_init(env, regs, caller_saved[i]);
+++		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
++ 	}
++ 
++ 	/* mark destination R0 register as readable, since it contains
++-	 * the value fetched from the packet
+++	 * the value fetched from the packet.
+++	 * Already marked as written above.
++ 	 */
++-	regs[BPF_REG_0].type = UNKNOWN_VALUE;
+++	mark_reg_unknown(env, regs, BPF_REG_0);
+++	/* ld_abs load up to 32-bit skb data. */
+++	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
+++	return 0;
+++}
+++
+++static int check_return_code(struct bpf_verifier_env *env)
+++{
+++	struct tnum enforce_attach_type_range = tnum_unknown;
+++	struct bpf_reg_state *reg;
+++	struct tnum range = tnum_range(0, 1);
+++
+++	switch (env->prog->type) {
+++	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+++		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
+++		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG)
+++			range = tnum_range(1, 1);
+++		break;
+++	case BPF_PROG_TYPE_CGROUP_SKB:
+++		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
+++			range = tnum_range(0, 3);
+++			enforce_attach_type_range = tnum_range(2, 3);
+++		}
+++		break;
+++	case BPF_PROG_TYPE_CGROUP_SOCK:
+++	case BPF_PROG_TYPE_SOCK_OPS:
+++	case BPF_PROG_TYPE_CGROUP_DEVICE:
+++	case BPF_PROG_TYPE_CGROUP_SYSCTL:
+++	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+++		break;
+++	default:
+++		return 0;
+++	}
+++
+++	reg = cur_regs(env) + BPF_REG_0;
+++	if (reg->type != SCALAR_VALUE) {
+++		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
+++			reg_type_str[reg->type]);
+++		return -EINVAL;
+++	}
+++
+++	if (!tnum_in(range, reg->var_off)) {
+++		char tn_buf[48];
+++
+++		verbose(env, "At program exit the register R0 ");
+++		if (!tnum_is_unknown(reg->var_off)) {
+++			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+++			verbose(env, "has value %s", tn_buf);
+++		} else {
+++			verbose(env, "has unknown scalar value");
+++		}
+++		tnum_strn(tn_buf, sizeof(tn_buf), range);
+++		verbose(env, " should have been in %s\n", tn_buf);
+++		return -EINVAL;
+++	}
+++
+++	if (!tnum_is_unknown(enforce_attach_type_range) &&
+++	    tnum_in(enforce_attach_type_range, reg->var_off))
+++		env->prog->enforce_expected_attach_type = 1;
++ 	return 0;
++ }
++ 
++@@ -1447,19 +6457,37 @@ enum {
++ 	BRANCH = 2,
++ };
++ 
++-#define STATE_LIST_MARK ((struct verifier_state_list *) -1L)
+++static u32 state_htab_size(struct bpf_verifier_env *env)
+++{
+++	return env->prog->len;
+++}
+++
+++static struct bpf_verifier_state_list **explored_state(
+++					struct bpf_verifier_env *env,
+++					int idx)
+++{
+++	struct bpf_verifier_state *cur = env->cur_state;
+++	struct bpf_func_state *state = cur->frame[cur->curframe];
+++
+++	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
+++}
++ 
++-static int *insn_stack;	/* stack of insns to process */
++-static int cur_stack;	/* current stack index */
++-static int *insn_state;
+++static void init_explored_state(struct bpf_verifier_env *env, int idx)
+++{
+++	env->insn_aux_data[idx].prune_point = true;
+++}
++ 
++ /* t, w, e - match pseudo-code above:
++  * t - index of current instruction
++  * w - next instruction
++  * e - edge
++  */
++-static int push_insn(int t, int w, int e, struct verifier_env *env)
+++static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
+++		     bool loop_ok)
++ {
+++	int *insn_stack = env->cfg.insn_stack;
+++	int *insn_state = env->cfg.insn_state;
+++
++ 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
++ 		return 0;
++ 
++@@ -1467,30 +6495,35 @@ static int push_insn(int t, int w, int e
++ 		return 0;
++ 
++ 	if (w < 0 || w >= env->prog->len) {
++-		verbose("jump out of range from insn %d to %d\n", t, w);
+++		verbose_linfo(env, t, "%d: ", t);
+++		verbose(env, "jump out of range from insn %d to %d\n", t, w);
++ 		return -EINVAL;
++ 	}
++ 
++ 	if (e == BRANCH)
++ 		/* mark branch target for state pruning */
++-		env->explored_states[w] = STATE_LIST_MARK;
+++		init_explored_state(env, w);
++ 
++ 	if (insn_state[w] == 0) {
++ 		/* tree-edge */
++ 		insn_state[t] = DISCOVERED | e;
++ 		insn_state[w] = DISCOVERED;
++-		if (cur_stack >= env->prog->len)
+++		if (env->cfg.cur_stack >= env->prog->len)
++ 			return -E2BIG;
++-		insn_stack[cur_stack++] = w;
+++		insn_stack[env->cfg.cur_stack++] = w;
++ 		return 1;
++ 	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
++-		verbose("back-edge from insn %d to %d\n", t, w);
+++		if (loop_ok && env->allow_ptr_leaks)
+++			return 0;
+++		verbose_linfo(env, t, "%d: ", t);
+++		verbose_linfo(env, w, "%d: ", w);
+++		verbose(env, "back-edge from insn %d to %d\n", t, w);
++ 		return -EINVAL;
++ 	} else if (insn_state[w] == EXPLORED) {
++ 		/* forward- or cross-edge */
++ 		insn_state[t] = DISCOVERED | e;
++ 	} else {
++-		verbose("insn state internal bug\n");
+++		verbose(env, "insn state internal bug\n");
++ 		return -EFAULT;
++ 	}
++ 	return 0;
++@@ -1499,43 +6532,56 @@ static int push_insn(int t, int w, int e
++ /* non-recursive depth-first-search to detect loops in BPF program
++  * loop == back-edge in directed graph
++  */
++-static int check_cfg(struct verifier_env *env)
+++static int check_cfg(struct bpf_verifier_env *env)
++ {
++ 	struct bpf_insn *insns = env->prog->insnsi;
++ 	int insn_cnt = env->prog->len;
+++	int *insn_stack, *insn_state;
++ 	int ret = 0;
++ 	int i, t;
++ 
++-	insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+++	insn_state = env->cfg.insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
++ 	if (!insn_state)
++ 		return -ENOMEM;
++ 
++-	insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+++	insn_stack = env->cfg.insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
++ 	if (!insn_stack) {
++-		kfree(insn_state);
+++		kvfree(insn_state);
++ 		return -ENOMEM;
++ 	}
++ 
++ 	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
++ 	insn_stack[0] = 0; /* 0 is the first instruction */
++-	cur_stack = 1;
+++	env->cfg.cur_stack = 1;
++ 
++ peek_stack:
++-	if (cur_stack == 0)
+++	if (env->cfg.cur_stack == 0)
++ 		goto check_state;
++-	t = insn_stack[cur_stack - 1];
+++	t = insn_stack[env->cfg.cur_stack - 1];
++ 
++-	if (BPF_CLASS(insns[t].code) == BPF_JMP) {
+++	if (BPF_CLASS(insns[t].code) == BPF_JMP ||
+++	    BPF_CLASS(insns[t].code) == BPF_JMP32) {
++ 		u8 opcode = BPF_OP(insns[t].code);
++ 
++ 		if (opcode == BPF_EXIT) {
++ 			goto mark_explored;
++ 		} else if (opcode == BPF_CALL) {
++-			ret = push_insn(t, t + 1, FALLTHROUGH, env);
+++			ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
++ 			if (ret == 1)
++ 				goto peek_stack;
++ 			else if (ret < 0)
++ 				goto err_free;
+++			if (t + 1 < insn_cnt)
+++				init_explored_state(env, t + 1);
+++			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
+++				init_explored_state(env, t);
+++				ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
+++						env, false);
+++				if (ret == 1)
+++					goto peek_stack;
+++				else if (ret < 0)
+++					goto err_free;
+++			}
++ 		} else if (opcode == BPF_JA) {
++ 			if (BPF_SRC(insns[t].code) != BPF_K) {
++ 				ret = -EINVAL;
++@@ -1543,25 +6589,31 @@ peek_stack:
++ 			}
++ 			/* unconditional jump with single edge */
++ 			ret = push_insn(t, t + insns[t].off + 1,
++-					FALLTHROUGH, env);
+++					FALLTHROUGH, env, true);
++ 			if (ret == 1)
++ 				goto peek_stack;
++ 			else if (ret < 0)
++ 				goto err_free;
+++			/* unconditional jmp is not a good pruning point,
+++			 * but it's marked, since backtracking needs
+++			 * to record jmp history in is_state_visited().
+++			 */
+++			init_explored_state(env, t + insns[t].off + 1);
++ 			/* tell verifier to check for equivalent states
++ 			 * after every call and jump
++ 			 */
++ 			if (t + 1 < insn_cnt)
++-				env->explored_states[t + 1] = STATE_LIST_MARK;
+++				init_explored_state(env, t + 1);
++ 		} else {
++ 			/* conditional jump with two edges */
++-			ret = push_insn(t, t + 1, FALLTHROUGH, env);
+++			init_explored_state(env, t);
+++			ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
++ 			if (ret == 1)
++ 				goto peek_stack;
++ 			else if (ret < 0)
++ 				goto err_free;
++ 
++-			ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
+++			ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
++ 			if (ret == 1)
++ 				goto peek_stack;
++ 			else if (ret < 0)
++@@ -1571,7 +6623,7 @@ peek_stack:
++ 		/* all other non-branch instructions with single
++ 		 * fall-through edge
++ 		 */
++-		ret = push_insn(t, t + 1, FALLTHROUGH, env);
+++		ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
++ 		if (ret == 1)
++ 			goto peek_stack;
++ 		else if (ret < 0)
++@@ -1580,8 +6632,8 @@ peek_stack:
++ 
++ mark_explored:
++ 	insn_state[t] = EXPLORED;
++-	if (cur_stack-- <= 0) {
++-		verbose("pop stack internal bug\n");
+++	if (env->cfg.cur_stack-- <= 0) {
+++		verbose(env, "pop stack internal bug\n");
++ 		ret = -EFAULT;
++ 		goto err_free;
++ 	}
++@@ -1590,7 +6642,7 @@ mark_explored:
++ check_state:
++ 	for (i = 0; i < insn_cnt; i++) {
++ 		if (insn_state[i] != EXPLORED) {
++-			verbose("unreachable insn %d\n", i);
+++			verbose(env, "unreachable insn %d\n", i);
++ 			ret = -EINVAL;
++ 			goto err_free;
++ 		}
++@@ -1598,11 +6650,616 @@ check_state:
++ 	ret = 0; /* cfg looks good */
++ 
++ err_free:
++-	kfree(insn_state);
++-	kfree(insn_stack);
+++	kvfree(insn_state);
+++	kvfree(insn_stack);
+++	env->cfg.insn_state = env->cfg.insn_stack = NULL;
++ 	return ret;
++ }
++ 
+++/* The minimum supported BTF func info size */
+++#define MIN_BPF_FUNCINFO_SIZE	8
+++#define MAX_FUNCINFO_REC_SIZE	252
+++
+++static int check_btf_func(struct bpf_verifier_env *env,
+++			  const union bpf_attr *attr,
+++			  union bpf_attr __user *uattr)
+++{
+++	u32 i, nfuncs, urec_size, min_size;
+++	u32 krec_size = sizeof(struct bpf_func_info);
+++	struct bpf_func_info *krecord;
+++	const struct btf_type *type;
+++	struct bpf_prog *prog;
+++	const struct btf *btf;
+++	void __user *urecord;
+++	u32 prev_offset = 0;
+++	int ret = 0;
+++
+++	nfuncs = attr->func_info_cnt;
+++	if (!nfuncs)
+++		return 0;
+++
+++	if (nfuncs != env->subprog_cnt) {
+++		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
+++		return -EINVAL;
+++	}
+++
+++	urec_size = attr->func_info_rec_size;
+++	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
+++	    urec_size > MAX_FUNCINFO_REC_SIZE ||
+++	    urec_size % sizeof(u32)) {
+++		verbose(env, "invalid func info rec size %u\n", urec_size);
+++		return -EINVAL;
+++	}
+++
+++	prog = env->prog;
+++	btf = prog->aux->btf;
+++
+++	urecord = u64_to_user_ptr(attr->func_info);
+++	min_size = min_t(u32, krec_size, urec_size);
+++
+++	krecord = kcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
+++	if (!krecord)
+++		return -ENOMEM;
+++
+++	for (i = 0; i < nfuncs; i++) {
+++		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
+++		if (ret) {
+++			if (ret == -E2BIG) {
+++				verbose(env, "nonzero tailing record in func info");
+++				/* set the size kernel expects so loader can zero
+++				 * out the rest of the record.
+++				 */
+++				if (put_user(min_size, &uattr->func_info_rec_size))
+++					ret = -EFAULT;
+++			}
+++			goto err_free;
+++		}
+++
+++		if (copy_from_user(&krecord[i], urecord, min_size)) {
+++			ret = -EFAULT;
+++			goto err_free;
+++		}
+++
+++		/* check insn_off */
+++		if (i == 0) {
+++			if (krecord[i].insn_off) {
+++				verbose(env,
+++					"nonzero insn_off %u for the first func info record",
+++					krecord[i].insn_off);
+++				ret = -EINVAL;
+++				goto err_free;
+++			}
+++		} else if (krecord[i].insn_off <= prev_offset) {
+++			verbose(env,
+++				"same or smaller insn offset (%u) than previous func info record (%u)",
+++				krecord[i].insn_off, prev_offset);
+++			ret = -EINVAL;
+++			goto err_free;
+++		}
+++
+++		if (env->subprog_info[i].start != krecord[i].insn_off) {
+++			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
+++			ret = -EINVAL;
+++			goto err_free;
+++		}
+++
+++		/* check type_id */
+++		type = btf_type_by_id(btf, krecord[i].type_id);
+++		if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
+++			verbose(env, "invalid type id %d in func info",
+++				krecord[i].type_id);
+++			ret = -EINVAL;
+++			goto err_free;
+++		}
+++
+++		prev_offset = krecord[i].insn_off;
+++		urecord += urec_size;
+++	}
+++
+++	prog->aux->func_info = krecord;
+++	prog->aux->func_info_cnt = nfuncs;
+++	return 0;
+++
+++err_free:
+++	kvfree(krecord);
+++	return ret;
+++}
+++
+++static void adjust_btf_func(struct bpf_verifier_env *env)
+++{
+++	int i;
+++
+++	if (!env->prog->aux->func_info)
+++		return;
+++
+++	for (i = 0; i < env->subprog_cnt; i++)
+++		env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
+++}
+++
+++#define MIN_BPF_LINEINFO_SIZE	(offsetof(struct bpf_line_info, line_col) + \
+++		sizeof(((struct bpf_line_info *)(0))->line_col))
+++#define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
+++
+++static int check_btf_line(struct bpf_verifier_env *env,
+++			  const union bpf_attr *attr,
+++			  union bpf_attr __user *uattr)
+++{
+++	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
+++	struct bpf_subprog_info *sub;
+++	struct bpf_line_info *linfo;
+++	struct bpf_prog *prog;
+++	const struct btf *btf;
+++	void __user *ulinfo;
+++	int err;
+++
+++	nr_linfo = attr->line_info_cnt;
+++	if (!nr_linfo)
+++		return 0;
+++
+++	rec_size = attr->line_info_rec_size;
+++	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
+++	    rec_size > MAX_LINEINFO_REC_SIZE ||
+++	    rec_size & (sizeof(u32) - 1))
+++		return -EINVAL;
+++
+++	/* Need to zero it in case the userspace may
+++	 * pass in a smaller bpf_line_info object.
+++	 */
+++	linfo = kcalloc(nr_linfo, sizeof(struct bpf_line_info),
+++			 GFP_KERNEL | __GFP_NOWARN);
+++	if (!linfo)
+++		return -ENOMEM;
+++
+++	prog = env->prog;
+++	btf = prog->aux->btf;
+++
+++	s = 0;
+++	sub = env->subprog_info;
+++	ulinfo = u64_to_user_ptr(attr->line_info);
+++	expected_size = sizeof(struct bpf_line_info);
+++	ncopy = min_t(u32, expected_size, rec_size);
+++	for (i = 0; i < nr_linfo; i++) {
+++		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
+++		if (err) {
+++			if (err == -E2BIG) {
+++				verbose(env, "nonzero tailing record in line_info");
+++				if (put_user(expected_size,
+++					     &uattr->line_info_rec_size))
+++					err = -EFAULT;
+++			}
+++			goto err_free;
+++		}
+++
+++		if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
+++			err = -EFAULT;
+++			goto err_free;
+++		}
+++
+++		/*
+++		 * Check insn_off to ensure
+++		 * 1) strictly increasing AND
+++		 * 2) bounded by prog->len
+++		 *
+++		 * The linfo[0].insn_off == 0 check logically falls into
+++		 * the later "missing bpf_line_info for func..." case
+++		 * because the first linfo[0].insn_off must be the
+++		 * first sub also and the first sub must have
+++		 * subprog_info[0].start == 0.
+++		 */
+++		if ((i && linfo[i].insn_off <= prev_offset) ||
+++		    linfo[i].insn_off >= prog->len) {
+++			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
+++				i, linfo[i].insn_off, prev_offset,
+++				prog->len);
+++			err = -EINVAL;
+++			goto err_free;
+++		}
+++
+++		if (!prog->insnsi[linfo[i].insn_off].code) {
+++			verbose(env,
+++				"Invalid insn code at line_info[%u].insn_off\n",
+++				i);
+++			err = -EINVAL;
+++			goto err_free;
+++		}
+++
+++		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
+++		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
+++			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
+++			err = -EINVAL;
+++			goto err_free;
+++		}
+++
+++		if (s != env->subprog_cnt) {
+++			if (linfo[i].insn_off == sub[s].start) {
+++				sub[s].linfo_idx = i;
+++				s++;
+++			} else if (sub[s].start < linfo[i].insn_off) {
+++				verbose(env, "missing bpf_line_info for func#%u\n", s);
+++				err = -EINVAL;
+++				goto err_free;
+++			}
+++		}
+++
+++		prev_offset = linfo[i].insn_off;
+++		ulinfo += rec_size;
+++	}
+++
+++	if (s != env->subprog_cnt) {
+++		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
+++			env->subprog_cnt - s, s);
+++		err = -EINVAL;
+++		goto err_free;
+++	}
+++
+++	prog->aux->linfo = linfo;
+++	prog->aux->nr_linfo = nr_linfo;
+++
+++	return 0;
+++
+++err_free:
+++	kvfree(linfo);
+++	return err;
+++}
+++
+++static int check_btf_info(struct bpf_verifier_env *env,
+++			  const union bpf_attr *attr,
+++			  union bpf_attr __user *uattr)
+++{
+++	struct btf *btf;
+++	int err;
+++
+++	if (!attr->func_info_cnt && !attr->line_info_cnt)
+++		return 0;
+++
+++	btf = btf_get_by_fd(attr->prog_btf_fd);
+++	if (IS_ERR(btf))
+++		return PTR_ERR(btf);
+++	env->prog->aux->btf = btf;
+++
+++	err = check_btf_func(env, attr, uattr);
+++	if (err)
+++		return err;
+++
+++	err = check_btf_line(env, attr, uattr);
+++	if (err)
+++		return err;
+++
+++	return 0;
+++}
+++
+++/* check %cur's range satisfies %old's */
+++static bool range_within(struct bpf_reg_state *old,
+++			 struct bpf_reg_state *cur)
+++{
+++	return old->umin_value <= cur->umin_value &&
+++	       old->umax_value >= cur->umax_value &&
+++	       old->smin_value <= cur->smin_value &&
+++	       old->smax_value >= cur->smax_value;
+++}
+++
+++/* Maximum number of register states that can exist at once */
+++#define ID_MAP_SIZE	(MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
+++struct idpair {
+++	u32 old;
+++	u32 cur;
+++};
+++
+++/* If in the old state two registers had the same id, then they need to have
+++ * the same id in the new state as well.  But that id could be different from
+++ * the old state, so we need to track the mapping from old to new ids.
+++ * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
+++ * regs with old id 5 must also have new id 9 for the new state to be safe.  But
+++ * regs with a different old id could still have new id 9, we don't care about
+++ * that.
+++ * So we look through our idmap to see if this old id has been seen before.  If
+++ * so, we require the new id to match; otherwise, we add the id pair to the map.
+++ */
+++static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
+++{
+++	unsigned int i;
+++
+++	for (i = 0; i < ID_MAP_SIZE; i++) {
+++		if (!idmap[i].old) {
+++			/* Reached an empty slot; haven't seen this id before */
+++			idmap[i].old = old_id;
+++			idmap[i].cur = cur_id;
+++			return true;
+++		}
+++		if (idmap[i].old == old_id)
+++			return idmap[i].cur == cur_id;
+++	}
+++	/* We ran out of idmap slots, which should be impossible */
+++	WARN_ON_ONCE(1);
+++	return false;
+++}
+++
+++static void clean_func_state(struct bpf_verifier_env *env,
+++			     struct bpf_func_state *st)
+++{
+++	enum bpf_reg_liveness live;
+++	int i, j;
+++
+++	for (i = 0; i < BPF_REG_FP; i++) {
+++		live = st->regs[i].live;
+++		/* liveness must not touch this register anymore */
+++		st->regs[i].live |= REG_LIVE_DONE;
+++		if (!(live & REG_LIVE_READ))
+++			/* since the register is unused, clear its state
+++			 * to make further comparison simpler
+++			 */
+++			__mark_reg_not_init(env, &st->regs[i]);
+++	}
+++
+++	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
+++		live = st->stack[i].spilled_ptr.live;
+++		/* liveness must not touch this stack slot anymore */
+++		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
+++		if (!(live & REG_LIVE_READ)) {
+++			__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
+++			for (j = 0; j < BPF_REG_SIZE; j++)
+++				st->stack[i].slot_type[j] = STACK_INVALID;
+++		}
+++	}
+++}
+++
+++static void clean_verifier_state(struct bpf_verifier_env *env,
+++				 struct bpf_verifier_state *st)
+++{
+++	int i;
+++
+++	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
+++		/* all regs in this state in all frames were already marked */
+++		return;
+++
+++	for (i = 0; i <= st->curframe; i++)
+++		clean_func_state(env, st->frame[i]);
+++}
+++
+++/* the parentage chains form a tree.
+++ * the verifier states are added to state lists at given insn and
+++ * pushed into state stack for future exploration.
+++ * when the verifier reaches bpf_exit insn some of the verifer states
+++ * stored in the state lists have their final liveness state already,
+++ * but a lot of states will get revised from liveness point of view when
+++ * the verifier explores other branches.
+++ * Example:
+++ * 1: r0 = 1
+++ * 2: if r1 == 100 goto pc+1
+++ * 3: r0 = 2
+++ * 4: exit
+++ * when the verifier reaches exit insn the register r0 in the state list of
+++ * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
+++ * of insn 2 and goes exploring further. At the insn 4 it will walk the
+++ * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
+++ *
+++ * Since the verifier pushes the branch states as it sees them while exploring
+++ * the program the condition of walking the branch instruction for the second
+++ * time means that all states below this branch were already explored and
+++ * their final liveness markes are already propagated.
+++ * Hence when the verifier completes the search of state list in is_state_visited()
+++ * we can call this clean_live_states() function to mark all liveness states
+++ * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
+++ * will not be used.
+++ * This function also clears the registers and stack for states that !READ
+++ * to simplify state merging.
+++ *
+++ * Important note here that walking the same branch instruction in the callee
+++ * doesn't meant that the states are DONE. The verifier has to compare
+++ * the callsites
+++ */
+++static void clean_live_states(struct bpf_verifier_env *env, int insn,
+++			      struct bpf_verifier_state *cur)
+++{
+++	struct bpf_verifier_state_list *sl;
+++	int i;
+++
+++	sl = *explored_state(env, insn);
+++	while (sl) {
+++		if (sl->state.branches)
+++			goto next;
+++		if (sl->state.insn_idx != insn ||
+++		    sl->state.curframe != cur->curframe)
+++			goto next;
+++		for (i = 0; i <= cur->curframe; i++)
+++			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
+++				goto next;
+++		clean_verifier_state(env, &sl->state);
+++next:
+++		sl = sl->next;
+++	}
+++}
+++
+++/* Returns true if (rold safe implies rcur safe) */
+++static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
+++		    struct idpair *idmap)
+++{
+++	bool equal;
+++
+++	if (!(rold->live & REG_LIVE_READ))
+++		/* explored state didn't use this */
+++		return true;
+++
+++	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
+++
+++	if (rold->type == PTR_TO_STACK)
+++		/* two stack pointers are equal only if they're pointing to
+++		 * the same stack frame, since fp-8 in foo != fp-8 in bar
+++		 */
+++		return equal && rold->frameno == rcur->frameno;
+++
+++	if (equal)
+++		return true;
+++
+++	if (rold->type == NOT_INIT)
+++		/* explored state can't have used this */
+++		return true;
+++	if (rcur->type == NOT_INIT)
+++		return false;
+++	switch (rold->type) {
+++	case SCALAR_VALUE:
+++		if (rcur->type == SCALAR_VALUE) {
+++			if (!rold->precise && !rcur->precise)
+++				return true;
+++			/* new val must satisfy old val knowledge */
+++			return range_within(rold, rcur) &&
+++			       tnum_in(rold->var_off, rcur->var_off);
+++		} else {
+++			/* We're trying to use a pointer in place of a scalar.
+++			 * Even if the scalar was unbounded, this could lead to
+++			 * pointer leaks because scalars are allowed to leak
+++			 * while pointers are not. We could make this safe in
+++			 * special cases if root is calling us, but it's
+++			 * probably not worth the hassle.
+++			 */
+++			return false;
+++		}
+++	case PTR_TO_MAP_VALUE:
+++		/* If the new min/max/var_off satisfy the old ones and
+++		 * everything else matches, we are OK.
+++		 * 'id' is not compared, since it's only used for maps with
+++		 * bpf_spin_lock inside map element and in such cases if
+++		 * the rest of the prog is valid for one map element then
+++		 * it's valid for all map elements regardless of the key
+++		 * used in bpf_map_lookup()
+++		 */
+++		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+++		       range_within(rold, rcur) &&
+++		       tnum_in(rold->var_off, rcur->var_off);
+++	case PTR_TO_MAP_VALUE_OR_NULL:
+++		/* a PTR_TO_MAP_VALUE could be safe to use as a
+++		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
+++		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
+++		 * checked, doing so could have affected others with the same
+++		 * id, and we can't check for that because we lost the id when
+++		 * we converted to a PTR_TO_MAP_VALUE.
+++		 */
+++		if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
+++			return false;
+++		if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
+++			return false;
+++		/* Check our ids match any regs they're supposed to */
+++		return check_ids(rold->id, rcur->id, idmap);
+++	case PTR_TO_PACKET_META:
+++	case PTR_TO_PACKET:
+++		if (rcur->type != rold->type)
+++			return false;
+++		/* We must have at least as much range as the old ptr
+++		 * did, so that any accesses which were safe before are
+++		 * still safe.  This is true even if old range < old off,
+++		 * since someone could have accessed through (ptr - k), or
+++		 * even done ptr -= k in a register, to get a safe access.
+++		 */
+++		if (rold->range > rcur->range)
+++			return false;
+++		/* If the offsets don't match, we can't trust our alignment;
+++		 * nor can we be sure that we won't fall out of range.
+++		 */
+++		if (rold->off != rcur->off)
+++			return false;
+++		/* id relations must be preserved */
+++		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
+++			return false;
+++		/* new val must satisfy old val knowledge */
+++		return range_within(rold, rcur) &&
+++		       tnum_in(rold->var_off, rcur->var_off);
+++	case PTR_TO_CTX:
+++	case CONST_PTR_TO_MAP:
+++	case PTR_TO_PACKET_END:
+++	case PTR_TO_FLOW_KEYS:
+++	case PTR_TO_SOCKET:
+++	case PTR_TO_SOCKET_OR_NULL:
+++	case PTR_TO_SOCK_COMMON:
+++	case PTR_TO_SOCK_COMMON_OR_NULL:
+++	case PTR_TO_TCP_SOCK:
+++	case PTR_TO_TCP_SOCK_OR_NULL:
+++	case PTR_TO_XDP_SOCK:
+++		/* Only valid matches are exact, which memcmp() above
+++		 * would have accepted
+++		 */
+++	default:
+++		/* Don't know what's going on, just say it's not safe */
+++		return false;
+++	}
+++
+++	/* Shouldn't get here; if we do, say it's not safe */
+++	WARN_ON_ONCE(1);
+++	return false;
+++}
+++
+++static bool stacksafe(struct bpf_func_state *old,
+++		      struct bpf_func_state *cur,
+++		      struct idpair *idmap)
+++{
+++	int i, spi;
+++
+++	/* walk slots of the explored stack and ignore any additional
+++	 * slots in the current stack, since explored(safe) state
+++	 * didn't use them
+++	 */
+++	for (i = 0; i < old->allocated_stack; i++) {
+++		spi = i / BPF_REG_SIZE;
+++
+++		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
+++			i += BPF_REG_SIZE - 1;
+++			/* explored state didn't use this */
+++			continue;
+++		}
+++
+++		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
+++			continue;
+++
+++		/* explored stack has more populated slots than current stack
+++		 * and these slots were used
+++		 */
+++		if (i >= cur->allocated_stack)
+++			return false;
+++
+++		/* if old state was safe with misc data in the stack
+++		 * it will be safe with zero-initialized stack.
+++		 * The opposite is not true
+++		 */
+++		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
+++		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
+++			continue;
+++		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
+++		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
+++			/* Ex: old explored (safe) state has STACK_SPILL in
+++			 * this stack slot, but current has has STACK_MISC ->
+++			 * this verifier states are not equivalent,
+++			 * return false to continue verification of this path
+++			 */
+++			return false;
+++		if (i % BPF_REG_SIZE)
+++			continue;
+++		if (old->stack[spi].slot_type[0] != STACK_SPILL)
+++			continue;
+++		if (!regsafe(&old->stack[spi].spilled_ptr,
+++			     &cur->stack[spi].spilled_ptr,
+++			     idmap))
+++			/* when explored and current stack slot are both storing
+++			 * spilled registers, check that stored pointers types
+++			 * are the same as well.
+++			 * Ex: explored safe path could have stored
+++			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
+++			 * but current path has stored:
+++			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
+++			 * such verifier states are not equivalent.
+++			 * return false to continue verification of this path
+++			 */
+++			return false;
+++	}
+++	return true;
+++}
+++
+++static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
+++{
+++	if (old->acquired_refs != cur->acquired_refs)
+++		return false;
+++	return !memcmp(old->refs, cur->refs,
+++		       sizeof(*old->refs) * old->acquired_refs);
+++}
+++
++ /* compare two verifier states
++  *
++  * all states stored in state_list are known to be valid, since
++@@ -1629,165 +7286,562 @@ err_free:
++  * whereas register type in current state is meaningful, it means that
++  * the current state will reach 'bpf_exit' instruction safely
++  */
++-static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
+++static bool func_states_equal(struct bpf_func_state *old,
+++			      struct bpf_func_state *cur)
++ {
+++	struct idpair *idmap;
+++	bool ret = false;
++ 	int i;
++ 
+++	idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
+++	/* If we failed to allocate the idmap, just say it's not safe */
+++	if (!idmap)
+++		return false;
+++
++ 	for (i = 0; i < MAX_BPF_REG; i++) {
++-		if (memcmp(&old->regs[i], &cur->regs[i],
++-			   sizeof(old->regs[0])) != 0) {
++-			if (old->regs[i].type == NOT_INIT ||
++-			    (old->regs[i].type == UNKNOWN_VALUE &&
++-			     cur->regs[i].type != NOT_INIT))
++-				continue;
+++		if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
+++			goto out_free;
+++	}
+++
+++	if (!stacksafe(old, cur, idmap))
+++		goto out_free;
+++
+++	if (!refsafe(old, cur))
+++		goto out_free;
+++	ret = true;
+++out_free:
+++	kfree(idmap);
+++	return ret;
+++}
+++
+++static bool states_equal(struct bpf_verifier_env *env,
+++			 struct bpf_verifier_state *old,
+++			 struct bpf_verifier_state *cur)
+++{
+++	int i;
+++
+++	if (old->curframe != cur->curframe)
+++		return false;
+++
+++	/* Verification state from speculative execution simulation
+++	 * must never prune a non-speculative execution one.
+++	 */
+++	if (old->speculative && !cur->speculative)
+++		return false;
+++
+++	if (old->active_spin_lock != cur->active_spin_lock)
+++		return false;
+++
+++	/* for states to be equal callsites have to be the same
+++	 * and all frame states need to be equivalent
+++	 */
+++	for (i = 0; i <= old->curframe; i++) {
+++		if (old->frame[i]->callsite != cur->frame[i]->callsite)
+++			return false;
+++		if (!func_states_equal(old->frame[i], cur->frame[i]))
++ 			return false;
+++	}
+++	return true;
+++}
+++
+++/* Return 0 if no propagation happened. Return negative error code if error
+++ * happened. Otherwise, return the propagated bit.
+++ */
+++static int propagate_liveness_reg(struct bpf_verifier_env *env,
+++				  struct bpf_reg_state *reg,
+++				  struct bpf_reg_state *parent_reg)
+++{
+++	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
+++	u8 flag = reg->live & REG_LIVE_READ;
+++	int err;
+++
+++	/* When comes here, read flags of PARENT_REG or REG could be any of
+++	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
+++	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
+++	 */
+++	if (parent_flag == REG_LIVE_READ64 ||
+++	    /* Or if there is no read flag from REG. */
+++	    !flag ||
+++	    /* Or if the read flag from REG is the same as PARENT_REG. */
+++	    parent_flag == flag)
+++		return 0;
+++
+++	err = mark_reg_read(env, reg, parent_reg, flag);
+++	if (err)
+++		return err;
+++
+++	return flag;
+++}
+++
+++/* A write screens off any subsequent reads; but write marks come from the
+++ * straight-line code between a state and its parent.  When we arrive at an
+++ * equivalent state (jump target or such) we didn't arrive by the straight-line
+++ * code, so read marks in the state must propagate to the parent regardless
+++ * of the state's write marks. That's what 'parent == state->parent' comparison
+++ * in mark_reg_read() is for.
+++ */
+++static int propagate_liveness(struct bpf_verifier_env *env,
+++			      const struct bpf_verifier_state *vstate,
+++			      struct bpf_verifier_state *vparent)
+++{
+++	struct bpf_reg_state *state_reg, *parent_reg;
+++	struct bpf_func_state *state, *parent;
+++	int i, frame, err = 0;
+++
+++	if (vparent->curframe != vstate->curframe) {
+++		WARN(1, "propagate_live: parent frame %d current frame %d\n",
+++		     vparent->curframe, vstate->curframe);
+++		return -EFAULT;
+++	}
+++	/* Propagate read liveness of registers... */
+++	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
+++	for (frame = 0; frame <= vstate->curframe; frame++) {
+++		parent = vparent->frame[frame];
+++		state = vstate->frame[frame];
+++		parent_reg = parent->regs;
+++		state_reg = state->regs;
+++		/* We don't need to worry about FP liveness, it's read-only */
+++		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
+++			err = propagate_liveness_reg(env, &state_reg[i],
+++						     &parent_reg[i]);
+++			if (err < 0)
+++				return err;
+++			if (err == REG_LIVE_READ64)
+++				mark_insn_zext(env, &parent_reg[i]);
+++		}
+++
+++		/* Propagate stack slots. */
+++		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
+++			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
+++			parent_reg = &parent->stack[i].spilled_ptr;
+++			state_reg = &state->stack[i].spilled_ptr;
+++			err = propagate_liveness_reg(env, state_reg,
+++						     parent_reg);
+++			if (err < 0)
+++				return err;
++ 		}
++ 	}
+++	return 0;
+++}
++ 
++-	for (i = 0; i < MAX_BPF_STACK; i++) {
++-		if (old->stack_slot_type[i] == STACK_INVALID)
+++/* find precise scalars in the previous equivalent state and
+++ * propagate them into the current state
+++ */
+++static int propagate_precision(struct bpf_verifier_env *env,
+++			       const struct bpf_verifier_state *old)
+++{
+++	struct bpf_reg_state *state_reg;
+++	struct bpf_func_state *state;
+++	int i, err = 0;
+++
+++	state = old->frame[old->curframe];
+++	state_reg = state->regs;
+++	for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
+++		if (state_reg->type != SCALAR_VALUE ||
+++		    !state_reg->precise)
++ 			continue;
++-		if (old->stack_slot_type[i] != cur->stack_slot_type[i])
++-			/* Ex: old explored (safe) state has STACK_SPILL in
++-			 * this stack slot, but current has has STACK_MISC ->
++-			 * this verifier states are not equivalent,
++-			 * return false to continue verification of this path
++-			 */
++-			return false;
++-		if (i % BPF_REG_SIZE)
+++		if (env->log.level & BPF_LOG_LEVEL2)
+++			verbose(env, "propagating r%d\n", i);
+++		err = mark_chain_precision(env, i);
+++		if (err < 0)
+++			return err;
+++	}
+++
+++	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+++		if (state->stack[i].slot_type[0] != STACK_SPILL)
++ 			continue;
++-		if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE],
++-			   &cur->spilled_regs[i / BPF_REG_SIZE],
++-			   sizeof(old->spilled_regs[0])))
++-			/* when explored and current stack slot types are
++-			 * the same, check that stored pointers types
++-			 * are the same as well.
++-			 * Ex: explored safe path could have stored
++-			 * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8}
++-			 * but current path has stored:
++-			 * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16}
++-			 * such verifier states are not equivalent.
++-			 * return false to continue verification of this path
++-			 */
++-			return false;
++-		else
+++		state_reg = &state->stack[i].spilled_ptr;
+++		if (state_reg->type != SCALAR_VALUE ||
+++		    !state_reg->precise)
++ 			continue;
+++		if (env->log.level & BPF_LOG_LEVEL2)
+++			verbose(env, "propagating fp%d\n",
+++				(-i - 1) * BPF_REG_SIZE);
+++		err = mark_chain_precision_stack(env, i);
+++		if (err < 0)
+++			return err;
++ 	}
+++	return 0;
+++}
+++
+++static bool states_maybe_looping(struct bpf_verifier_state *old,
+++				 struct bpf_verifier_state *cur)
+++{
+++	struct bpf_func_state *fold, *fcur;
+++	int i, fr = cur->curframe;
+++
+++	if (old->curframe != fr)
+++		return false;
+++
+++	fold = old->frame[fr];
+++	fcur = cur->frame[fr];
+++	for (i = 0; i < MAX_BPF_REG; i++)
+++		if (memcmp(&fold->regs[i], &fcur->regs[i],
+++			   offsetof(struct bpf_reg_state, parent)))
+++			return false;
++ 	return true;
++ }
++ 
++-static int is_state_visited(struct verifier_env *env, int insn_idx)
+++
+++static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
++ {
++-	struct verifier_state_list *new_sl;
++-	struct verifier_state_list *sl;
+++	struct bpf_verifier_state_list *new_sl;
+++	struct bpf_verifier_state_list *sl, **pprev;
+++	struct bpf_verifier_state *cur = env->cur_state, *new;
+++	int i, j, err, states_cnt = 0;
+++	bool add_new_state = env->test_state_freq ? true : false;
++ 
++-	sl = env->explored_states[insn_idx];
++-	if (!sl)
+++	cur->last_insn_idx = env->prev_insn_idx;
+++	if (!env->insn_aux_data[insn_idx].prune_point)
++ 		/* this 'insn_idx' instruction wasn't marked, so we will not
++ 		 * be doing state search here
++ 		 */
++ 		return 0;
++ 
++-	while (sl != STATE_LIST_MARK) {
++-		if (states_equal(&sl->state, &env->cur_state))
+++	/* bpf progs typically have pruning point every 4 instructions
+++	 * http://vger.kernel.org/bpfconf2019.html#session-1
+++	 * Do not add new state for future pruning if the verifier hasn't seen
+++	 * at least 2 jumps and at least 8 instructions.
+++	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
+++	 * In tests that amounts to up to 50% reduction into total verifier
+++	 * memory consumption and 20% verifier time speedup.
+++	 */
+++	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
+++	    env->insn_processed - env->prev_insn_processed >= 8)
+++		add_new_state = true;
+++
+++	pprev = explored_state(env, insn_idx);
+++	sl = *pprev;
+++
+++	clean_live_states(env, insn_idx, cur);
+++
+++	while (sl) {
+++		states_cnt++;
+++		if (sl->state.insn_idx != insn_idx)
+++			goto next;
+++		if (sl->state.branches) {
+++			if (states_maybe_looping(&sl->state, cur) &&
+++			    states_equal(env, &sl->state, cur)) {
+++				verbose_linfo(env, insn_idx, "; ");
+++				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
+++				return -EINVAL;
+++			}
+++			/* if the verifier is processing a loop, avoid adding new state
+++			 * too often, since different loop iterations have distinct
+++			 * states and may not help future pruning.
+++			 * This threshold shouldn't be too low to make sure that
+++			 * a loop with large bound will be rejected quickly.
+++			 * The most abusive loop will be:
+++			 * r1 += 1
+++			 * if r1 < 1000000 goto pc-2
+++			 * 1M insn_procssed limit / 100 == 10k peak states.
+++			 * This threshold shouldn't be too high either, since states
+++			 * at the end of the loop are likely to be useful in pruning.
+++			 */
+++			if (env->jmps_processed - env->prev_jmps_processed < 20 &&
+++			    env->insn_processed - env->prev_insn_processed < 100)
+++				add_new_state = false;
+++			goto miss;
+++		}
+++		if (states_equal(env, &sl->state, cur)) {
+++			sl->hit_cnt++;
++ 			/* reached equivalent register/stack state,
++-			 * prune the search
+++			 * prune the search.
+++			 * Registers read by the continuation are read by us.
+++			 * If we have any write marks in env->cur_state, they
+++			 * will prevent corresponding reads in the continuation
+++			 * from reaching our parent (an explored_state).  Our
+++			 * own state will get the read marks recorded, but
+++			 * they'll be immediately forgotten as we're pruning
+++			 * this state and will pop a new one.
++ 			 */
++-			return 1;
++-		sl = sl->next;
++-	}
+++			err = propagate_liveness(env, &sl->state, cur);
++ 
++-	/* there were no equivalent states, remember current one.
++-	 * technically the current state is not proven to be safe yet,
++-	 * but it will either reach bpf_exit (which means it's safe) or
++-	 * it will be rejected. Since there are no loops, we won't be
++-	 * seeing this 'insn_idx' instruction again on the way to bpf_exit
+++			/* if previous state reached the exit with precision and
+++			 * current state is equivalent to it (except precsion marks)
+++			 * the precision needs to be propagated back in
+++			 * the current state.
+++			 */
+++			err = err ? : push_jmp_history(env, cur);
+++			err = err ? : propagate_precision(env, &sl->state);
+++			if (err)
+++				return err;
+++			return 1;
+++		}
+++miss:
+++		/* when new state is not going to be added do not increase miss count.
+++		 * Otherwise several loop iterations will remove the state
+++		 * recorded earlier. The goal of these heuristics is to have
+++		 * states from some iterations of the loop (some in the beginning
+++		 * and some at the end) to help pruning.
+++		 */
+++		if (add_new_state)
+++			sl->miss_cnt++;
+++		/* heuristic to determine whether this state is beneficial
+++		 * to keep checking from state equivalence point of view.
+++		 * Higher numbers increase max_states_per_insn and verification time,
+++		 * but do not meaningfully decrease insn_processed.
+++		 */
+++		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
+++			/* the state is unlikely to be useful. Remove it to
+++			 * speed up verification
+++			 */
+++			*pprev = sl->next;
+++			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
+++				u32 br = sl->state.branches;
+++
+++				WARN_ONCE(br,
+++					  "BUG live_done but branches_to_explore %d\n",
+++					  br);
+++				free_verifier_state(&sl->state, false);
+++				kfree(sl);
+++				env->peak_states--;
+++			} else {
+++				/* cannot free this state, since parentage chain may
+++				 * walk it later. Add it for free_list instead to
+++				 * be freed at the end of verification
+++				 */
+++				sl->next = env->free_list;
+++				env->free_list = sl;
+++			}
+++			sl = *pprev;
+++			continue;
+++		}
+++next:
+++		pprev = &sl->next;
+++		sl = *pprev;
+++	}
+++
+++	if (env->max_states_per_insn < states_cnt)
+++		env->max_states_per_insn = states_cnt;
+++
+++	if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
+++		return push_jmp_history(env, cur);
+++
+++	if (!add_new_state)
+++		return push_jmp_history(env, cur);
+++
+++	/* There were no equivalent states, remember the current one.
+++	 * Technically the current state is not proven to be safe yet,
+++	 * but it will either reach outer most bpf_exit (which means it's safe)
+++	 * or it will be rejected. When there are no loops the verifier won't be
+++	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
+++	 * again on the way to bpf_exit.
+++	 * When looping the sl->state.branches will be > 0 and this state
+++	 * will not be considered for equivalence until branches == 0.
++ 	 */
++-	new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER);
+++	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
++ 	if (!new_sl)
++ 		return -ENOMEM;
+++	env->total_states++;
+++	env->peak_states++;
+++	env->prev_jmps_processed = env->jmps_processed;
+++	env->prev_insn_processed = env->insn_processed;
++ 
++ 	/* add new state to the head of linked list */
++-	memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state));
++-	new_sl->next = env->explored_states[insn_idx];
++-	env->explored_states[insn_idx] = new_sl;
+++	new = &new_sl->state;
+++	err = copy_verifier_state(new, cur);
+++	if (err) {
+++		free_verifier_state(new, false);
+++		kfree(new_sl);
+++		return err;
+++	}
+++	new->insn_idx = insn_idx;
+++	WARN_ONCE(new->branches != 1,
+++		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
+++
+++	cur->parent = new;
+++	cur->first_insn_idx = insn_idx;
+++	clear_jmp_history(cur);
+++	new_sl->next = *explored_state(env, insn_idx);
+++	*explored_state(env, insn_idx) = new_sl;
+++	/* connect new state to parentage chain. Current frame needs all
+++	 * registers connected. Only r6 - r9 of the callers are alive (pushed
+++	 * to the stack implicitly by JITs) so in callers' frames connect just
+++	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
+++	 * the state of the call instruction (with WRITTEN set), and r0 comes
+++	 * from callee with its full parentage chain, anyway.
+++	 */
+++	/* clear write marks in current state: the writes we did are not writes
+++	 * our child did, so they don't screen off its reads from us.
+++	 * (There are no read marks in current state, because reads always mark
+++	 * their parent and current state never has children yet.  Only
+++	 * explored_states can get read marks.)
+++	 */
+++	for (j = 0; j <= cur->curframe; j++) {
+++		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
+++			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
+++		for (i = 0; i < BPF_REG_FP; i++)
+++			cur->frame[j]->regs[i].live = REG_LIVE_NONE;
+++	}
+++
+++	/* all stack frames are accessible from callee, clear them all */
+++	for (j = 0; j <= cur->curframe; j++) {
+++		struct bpf_func_state *frame = cur->frame[j];
+++		struct bpf_func_state *newframe = new->frame[j];
+++
+++		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
+++			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
+++			frame->stack[i].spilled_ptr.parent =
+++						&newframe->stack[i].spilled_ptr;
+++		}
+++	}
++ 	return 0;
++ }
++ 
++-static int do_check(struct verifier_env *env)
+++/* Return true if it's OK to have the same insn return a different type. */
+++static bool reg_type_mismatch_ok(enum bpf_reg_type type)
++ {
++-	struct verifier_state *state = &env->cur_state;
+++	switch (type) {
+++	case PTR_TO_CTX:
+++	case PTR_TO_SOCKET:
+++	case PTR_TO_SOCKET_OR_NULL:
+++	case PTR_TO_SOCK_COMMON:
+++	case PTR_TO_SOCK_COMMON_OR_NULL:
+++	case PTR_TO_TCP_SOCK:
+++	case PTR_TO_TCP_SOCK_OR_NULL:
+++	case PTR_TO_XDP_SOCK:
+++		return false;
+++	default:
+++		return true;
+++	}
+++}
+++
+++/* If an instruction was previously used with particular pointer types, then we
+++ * need to be careful to avoid cases such as the below, where it may be ok
+++ * for one branch accessing the pointer, but not ok for the other branch:
+++ *
+++ * R1 = sock_ptr
+++ * goto X;
+++ * ...
+++ * R1 = some_other_valid_ptr;
+++ * goto X;
+++ * ...
+++ * R2 = *(u32 *)(R1 + 0);
+++ */
+++static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
+++{
+++	return src != prev && (!reg_type_mismatch_ok(src) ||
+++			       !reg_type_mismatch_ok(prev));
+++}
+++
+++static int do_check(struct bpf_verifier_env *env)
+++{
+++	struct bpf_verifier_state *state;
++ 	struct bpf_insn *insns = env->prog->insnsi;
++-	struct reg_state *regs = state->regs;
+++	struct bpf_reg_state *regs;
++ 	int insn_cnt = env->prog->len;
++-	int insn_idx, prev_insn_idx = 0;
++-	int insn_processed = 0;
++ 	bool do_print_state = false;
+++	int prev_insn_idx = -1;
+++
+++	env->prev_linfo = NULL;
+++
+++	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
+++	if (!state)
+++		return -ENOMEM;
+++	state->curframe = 0;
+++	state->speculative = false;
+++	state->branches = 1;
+++	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
+++	if (!state->frame[0]) {
+++		kfree(state);
+++		return -ENOMEM;
+++	}
+++	env->cur_state = state;
+++	init_func_state(env, state->frame[0],
+++			BPF_MAIN_FUNC /* callsite */,
+++			0 /* frameno */,
+++			0 /* subprogno, zero == main subprog */);
++ 
++-	init_reg_state(regs);
++-	insn_idx = 0;
++ 	for (;;) {
++ 		struct bpf_insn *insn;
++ 		u8 class;
++ 		int err;
++ 
++-		if (insn_idx >= insn_cnt) {
++-			verbose("invalid insn idx %d insn_cnt %d\n",
++-				insn_idx, insn_cnt);
+++		env->prev_insn_idx = prev_insn_idx;
+++		if (env->insn_idx >= insn_cnt) {
+++			verbose(env, "invalid insn idx %d insn_cnt %d\n",
+++				env->insn_idx, insn_cnt);
++ 			return -EFAULT;
++ 		}
++ 
++-		insn = &insns[insn_idx];
+++		insn = &insns[env->insn_idx];
++ 		class = BPF_CLASS(insn->code);
++ 
++-		if (++insn_processed > 32768) {
++-			verbose("BPF program is too large. Proccessed %d insn\n",
++-				insn_processed);
+++		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
+++			verbose(env,
+++				"BPF program is too large. Processed %d insn\n",
+++				env->insn_processed);
++ 			return -E2BIG;
++ 		}
++ 
++-		err = is_state_visited(env, insn_idx);
+++		err = is_state_visited(env, env->insn_idx);
++ 		if (err < 0)
++ 			return err;
++ 		if (err == 1) {
++ 			/* found equivalent state, can prune the search */
++-			if (log_level) {
+++			if (env->log.level & BPF_LOG_LEVEL) {
++ 				if (do_print_state)
++-					verbose("\nfrom %d to %d: safe\n",
++-						prev_insn_idx, insn_idx);
+++					verbose(env, "\nfrom %d to %d%s: safe\n",
+++						env->prev_insn_idx, env->insn_idx,
+++						env->cur_state->speculative ?
+++						" (speculative execution)" : "");
++ 				else
++-					verbose("%d: safe\n", insn_idx);
+++					verbose(env, "%d: safe\n", env->insn_idx);
++ 			}
++ 			goto process_bpf_exit;
++ 		}
++ 
++-		if (log_level && do_print_state) {
++-			verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
++-			print_verifier_state(env);
+++		if (signal_pending(current))
+++			return -EAGAIN;
+++
+++		if (need_resched())
+++			cond_resched();
+++
+++		if (env->log.level & BPF_LOG_LEVEL2 ||
+++		    (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
+++			if (env->log.level & BPF_LOG_LEVEL2)
+++				verbose(env, "%d:", env->insn_idx);
+++			else
+++				verbose(env, "\nfrom %d to %d%s:",
+++					env->prev_insn_idx, env->insn_idx,
+++					env->cur_state->speculative ?
+++					" (speculative execution)" : "");
+++			print_verifier_state(env, state->frame[state->curframe]);
++ 			do_print_state = false;
++ 		}
++ 
++-		if (log_level) {
++-			verbose("%d: ", insn_idx);
++-			print_bpf_insn(env, insn);
+++		if (env->log.level & BPF_LOG_LEVEL) {
+++			const struct bpf_insn_cbs cbs = {
+++				.cb_print	= verbose,
+++				.private_data	= env,
+++			};
+++
+++			verbose_linfo(env, env->insn_idx, "; ");
+++			verbose(env, "%d: ", env->insn_idx);
+++			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
++ 		}
++ 
+++		regs = cur_regs(env);
+++		sanitize_mark_insn_seen(env);
+++		prev_insn_idx = env->insn_idx;
+++
++ 		if (class == BPF_ALU || class == BPF_ALU64) {
++ 			err = check_alu_op(env, insn);
++ 			if (err)
++ 				return err;
++ 
++ 		} else if (class == BPF_LDX) {
++-			enum bpf_reg_type src_reg_type;
+++			enum bpf_reg_type *prev_src_type, src_reg_type;
++ 
++ 			/* check for reserved fields is already done */
++ 
++ 			/* check src operand */
++-			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+++			err = check_reg_arg(env, insn->src_reg, SRC_OP);
++ 			if (err)
++ 				return err;
++ 
++-			err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK);
+++			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
++ 			if (err)
++ 				return err;
++ 
++@@ -1796,27 +7850,22 @@ static int do_check(struct verifier_env
++ 			/* check that memory (src_reg + off) is readable,
++ 			 * the state of dst_reg will be updated by this func
++ 			 */
++-			err = check_mem_access(env, insn->src_reg, insn->off,
++-					       BPF_SIZE(insn->code), BPF_READ,
++-					       insn->dst_reg);
+++			err = check_mem_access(env, env->insn_idx, insn->src_reg,
+++					       insn->off, BPF_SIZE(insn->code),
+++					       BPF_READ, insn->dst_reg, false);
++ 			if (err)
++ 				return err;
++ 
++-			if (BPF_SIZE(insn->code) != BPF_W) {
++-				insn_idx++;
++-				continue;
++-			}
+++			prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
++ 
++-			if (insn->imm == 0) {
+++			if (*prev_src_type == NOT_INIT) {
++ 				/* saw a valid insn
++ 				 * dst_reg = *(u32 *)(src_reg + off)
++-				 * use reserved 'imm' field to mark this insn
+++				 * save type to validate intersecting paths
++ 				 */
++-				insn->imm = src_reg_type;
+++				*prev_src_type = src_reg_type;
++ 
++-			} else if (src_reg_type != insn->imm &&
++-				   (src_reg_type == PTR_TO_CTX ||
++-				    insn->imm == PTR_TO_CTX)) {
+++			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
++ 				/* ABuser program is trying to use the same insn
++ 				 * dst_reg = *(u32*) (src_reg + off)
++ 				 * with different pointer types:
++@@ -1824,79 +7873,98 @@ static int do_check(struct verifier_env
++ 				 * src_reg == stack|map in some other branch.
++ 				 * Reject it.
++ 				 */
++-				verbose("same insn cannot be used with different pointers\n");
+++				verbose(env, "same insn cannot be used with different pointers\n");
++ 				return -EINVAL;
++ 			}
++ 
++ 		} else if (class == BPF_STX) {
++-			enum bpf_reg_type dst_reg_type;
+++			enum bpf_reg_type *prev_dst_type, dst_reg_type;
++ 
++ 			if (BPF_MODE(insn->code) == BPF_XADD) {
++-				err = check_xadd(env, insn);
+++				err = check_xadd(env, env->insn_idx, insn);
++ 				if (err)
++ 					return err;
++-				insn_idx++;
+++				env->insn_idx++;
++ 				continue;
++ 			}
++ 
++ 			/* check src1 operand */
++-			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+++			err = check_reg_arg(env, insn->src_reg, SRC_OP);
++ 			if (err)
++ 				return err;
++ 			/* check src2 operand */
++-			err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+++			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
++ 			if (err)
++ 				return err;
++ 
++ 			dst_reg_type = regs[insn->dst_reg].type;
++ 
++ 			/* check that memory (dst_reg + off) is writeable */
++-			err = check_mem_access(env, insn->dst_reg, insn->off,
++-					       BPF_SIZE(insn->code), BPF_WRITE,
++-					       insn->src_reg);
+++			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
+++					       insn->off, BPF_SIZE(insn->code),
+++					       BPF_WRITE, insn->src_reg, false);
++ 			if (err)
++ 				return err;
++ 
++-			if (insn->imm == 0) {
++-				insn->imm = dst_reg_type;
++-			} else if (dst_reg_type != insn->imm &&
++-				   (dst_reg_type == PTR_TO_CTX ||
++-				    insn->imm == PTR_TO_CTX)) {
++-				verbose("same insn cannot be used with different pointers\n");
+++			prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
+++
+++			if (*prev_dst_type == NOT_INIT) {
+++				*prev_dst_type = dst_reg_type;
+++			} else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
+++				verbose(env, "same insn cannot be used with different pointers\n");
++ 				return -EINVAL;
++ 			}
++ 
++ 		} else if (class == BPF_ST) {
++ 			if (BPF_MODE(insn->code) != BPF_MEM ||
++ 			    insn->src_reg != BPF_REG_0) {
++-				verbose("BPF_ST uses reserved fields\n");
+++				verbose(env, "BPF_ST uses reserved fields\n");
++ 				return -EINVAL;
++ 			}
++ 			/* check src operand */
++-			err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+++			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
++ 			if (err)
++ 				return err;
++ 
+++			if (is_ctx_reg(env, insn->dst_reg)) {
+++				verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
+++					insn->dst_reg,
+++					reg_type_str[reg_state(env, insn->dst_reg)->type]);
+++				return -EACCES;
+++			}
+++
++ 			/* check that memory (dst_reg + off) is writeable */
++-			err = check_mem_access(env, insn->dst_reg, insn->off,
++-					       BPF_SIZE(insn->code), BPF_WRITE,
++-					       -1);
+++			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
+++					       insn->off, BPF_SIZE(insn->code),
+++					       BPF_WRITE, -1, false);
++ 			if (err)
++ 				return err;
++ 
++-		} else if (class == BPF_JMP) {
+++		} else if (class == BPF_JMP || class == BPF_JMP32) {
++ 			u8 opcode = BPF_OP(insn->code);
++ 
+++			env->jmps_processed++;
++ 			if (opcode == BPF_CALL) {
++ 				if (BPF_SRC(insn->code) != BPF_K ||
++ 				    insn->off != 0 ||
++-				    insn->src_reg != BPF_REG_0 ||
++-				    insn->dst_reg != BPF_REG_0) {
++-					verbose("BPF_CALL uses reserved fields\n");
+++				    (insn->src_reg != BPF_REG_0 &&
+++				     insn->src_reg != BPF_PSEUDO_CALL) ||
+++				    insn->dst_reg != BPF_REG_0 ||
+++				    class == BPF_JMP32) {
+++					verbose(env, "BPF_CALL uses reserved fields\n");
++ 					return -EINVAL;
++ 				}
++ 
++-				err = check_call(env, insn->imm);
+++				if (env->cur_state->active_spin_lock &&
+++				    (insn->src_reg == BPF_PSEUDO_CALL ||
+++				     insn->imm != BPF_FUNC_spin_unlock)) {
+++					verbose(env, "function calls are not allowed while holding a lock\n");
+++					return -EINVAL;
+++				}
+++				if (insn->src_reg == BPF_PSEUDO_CALL)
+++					err = check_func_call(env, insn, &env->insn_idx);
+++				else
+++					err = check_helper_call(env, insn->imm, env->insn_idx);
++ 				if (err)
++ 					return err;
++ 
++@@ -1904,48 +7972,75 @@ static int do_check(struct verifier_env
++ 				if (BPF_SRC(insn->code) != BPF_K ||
++ 				    insn->imm != 0 ||
++ 				    insn->src_reg != BPF_REG_0 ||
++-				    insn->dst_reg != BPF_REG_0) {
++-					verbose("BPF_JA uses reserved fields\n");
+++				    insn->dst_reg != BPF_REG_0 ||
+++				    class == BPF_JMP32) {
+++					verbose(env, "BPF_JA uses reserved fields\n");
++ 					return -EINVAL;
++ 				}
++ 
++-				insn_idx += insn->off + 1;
+++				env->insn_idx += insn->off + 1;
++ 				continue;
++ 
++ 			} else if (opcode == BPF_EXIT) {
++ 				if (BPF_SRC(insn->code) != BPF_K ||
++ 				    insn->imm != 0 ||
++ 				    insn->src_reg != BPF_REG_0 ||
++-				    insn->dst_reg != BPF_REG_0) {
++-					verbose("BPF_EXIT uses reserved fields\n");
+++				    insn->dst_reg != BPF_REG_0 ||
+++				    class == BPF_JMP32) {
+++					verbose(env, "BPF_EXIT uses reserved fields\n");
++ 					return -EINVAL;
++ 				}
++ 
+++				if (env->cur_state->active_spin_lock) {
+++					verbose(env, "bpf_spin_unlock is missing\n");
+++					return -EINVAL;
+++				}
+++
+++				if (state->curframe) {
+++					/* exit from nested function */
+++					err = prepare_func_exit(env, &env->insn_idx);
+++					if (err)
+++						return err;
+++					do_print_state = true;
+++					continue;
+++				}
+++
+++				err = check_reference_leak(env);
+++				if (err)
+++					return err;
+++
++ 				/* eBPF calling convetion is such that R0 is used
++ 				 * to return the value from eBPF program.
++ 				 * Make sure that it's readable at this time
++ 				 * of bpf_exit, which means that program wrote
++ 				 * something into it earlier
++ 				 */
++-				err = check_reg_arg(regs, BPF_REG_0, SRC_OP);
+++				err = check_reg_arg(env, BPF_REG_0, SRC_OP);
++ 				if (err)
++ 					return err;
++ 
++ 				if (is_pointer_value(env, BPF_REG_0)) {
++-					verbose("R0 leaks addr as return value\n");
+++					verbose(env, "R0 leaks addr as return value\n");
++ 					return -EACCES;
++ 				}
++ 
+++				err = check_return_code(env);
+++				if (err)
+++					return err;
++ process_bpf_exit:
++-				insn_idx = pop_stack(env, &prev_insn_idx);
++-				if (insn_idx < 0) {
+++				update_branch_counts(env, env->cur_state);
+++				err = pop_stack(env, &prev_insn_idx,
+++						&env->insn_idx);
+++				if (err < 0) {
+++					if (err != -ENOENT)
+++						return err;
++ 					break;
++ 				} else {
++ 					do_print_state = true;
++ 					continue;
++ 				}
++ 			} else {
++-				err = check_cond_jmp_op(env, insn, &insn_idx);
+++				err = check_cond_jmp_op(env, insn, &env->insn_idx);
++ 				if (err)
++ 					return err;
++ 			}
++@@ -1962,83 +8057,194 @@ process_bpf_exit:
++ 				if (err)
++ 					return err;
++ 
++-				insn_idx++;
+++				env->insn_idx++;
+++				sanitize_mark_insn_seen(env);
++ 			} else {
++-				verbose("invalid BPF_LD mode\n");
+++				verbose(env, "invalid BPF_LD mode\n");
++ 				return -EINVAL;
++ 			}
++ 		} else {
++-			verbose("unknown insn class %d\n", class);
+++			verbose(env, "unknown insn class %d\n", class);
++ 			return -EINVAL;
++ 		}
++ 
++-		insn_idx++;
+++		env->insn_idx++;
++ 	}
++ 
+++	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
++ 	return 0;
++ }
++ 
+++static int check_map_prealloc(struct bpf_map *map)
+++{
+++	return (map->map_type != BPF_MAP_TYPE_HASH &&
+++		map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
+++		map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
+++		!(map->map_flags & BPF_F_NO_PREALLOC);
+++}
+++
+++static bool is_tracing_prog_type(enum bpf_prog_type type)
+++{
+++	switch (type) {
+++	case BPF_PROG_TYPE_KPROBE:
+++	case BPF_PROG_TYPE_TRACEPOINT:
+++	case BPF_PROG_TYPE_PERF_EVENT:
+++	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+++		return true;
+++	default:
+++		return false;
+++	}
+++}
+++
+++static int check_map_prog_compatibility(struct bpf_verifier_env *env,
+++					struct bpf_map *map,
+++					struct bpf_prog *prog)
+++
+++{
+++	/* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
+++	 * preallocated hash maps, since doing memory allocation
+++	 * in overflow_handler can crash depending on where nmi got
+++	 * triggered.
+++	 */
+++	if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
+++		if (!check_map_prealloc(map)) {
+++			verbose(env, "perf_event programs can only use preallocated hash map\n");
+++			return -EINVAL;
+++		}
+++		if (map->inner_map_meta &&
+++		    !check_map_prealloc(map->inner_map_meta)) {
+++			verbose(env, "perf_event programs can only use preallocated inner hash map\n");
+++			return -EINVAL;
+++		}
+++	}
+++
+++	if ((is_tracing_prog_type(prog->type) ||
+++	     prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
+++	    map_value_has_spin_lock(map)) {
+++		verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
+++		return -EINVAL;
+++	}
+++
+++	return 0;
+++}
+++
+++static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
+++{
+++	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
+++		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+++}
+++
++ /* look for pseudo eBPF instructions that access map FDs and
++  * replace them with actual map pointers
++  */
++-static int replace_map_fd_with_map_ptr(struct verifier_env *env)
+++static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
++ {
++ 	struct bpf_insn *insn = env->prog->insnsi;
++ 	int insn_cnt = env->prog->len;
++-	int i, j;
+++	int i, j, err;
+++
+++	err = bpf_prog_calc_tag(env->prog);
+++	if (err)
+++		return err;
++ 
++ 	for (i = 0; i < insn_cnt; i++, insn++) {
++ 		if (BPF_CLASS(insn->code) == BPF_LDX &&
++ 		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
++-			verbose("BPF_LDX uses reserved fields\n");
+++			verbose(env, "BPF_LDX uses reserved fields\n");
++ 			return -EINVAL;
++ 		}
++ 
++ 		if (BPF_CLASS(insn->code) == BPF_STX &&
++ 		    ((BPF_MODE(insn->code) != BPF_MEM &&
++ 		      BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
++-			verbose("BPF_STX uses reserved fields\n");
+++			verbose(env, "BPF_STX uses reserved fields\n");
++ 			return -EINVAL;
++ 		}
++ 
++ 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
+++			struct bpf_insn_aux_data *aux;
++ 			struct bpf_map *map;
++ 			struct fd f;
+++			u64 addr;
++ 
++ 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
++ 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
++ 			    insn[1].off != 0) {
++-				verbose("invalid bpf_ld_imm64 insn\n");
+++				verbose(env, "invalid bpf_ld_imm64 insn\n");
++ 				return -EINVAL;
++ 			}
++ 
++-			if (insn->src_reg == 0)
+++			if (insn[0].src_reg == 0)
++ 				/* valid generic load 64-bit imm */
++ 				goto next_insn;
++ 
++-			if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
++-				verbose("unrecognized bpf_ld_imm64 insn\n");
+++			/* In final convert_pseudo_ld_imm64() step, this is
+++			 * converted into regular 64-bit imm load insn.
+++			 */
+++			if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
+++			     insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
+++			    (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
+++			     insn[1].imm != 0)) {
+++				verbose(env,
+++					"unrecognized bpf_ld_imm64 insn\n");
++ 				return -EINVAL;
++ 			}
++ 
++-			f = fdget(insn->imm);
+++			f = fdget(insn[0].imm);
++ 			map = __bpf_map_get(f);
++ 			if (IS_ERR(map)) {
++-				verbose("fd %d is not pointing to valid bpf_map\n",
++-					insn->imm);
+++				verbose(env, "fd %d is not pointing to valid bpf_map\n",
+++					insn[0].imm);
++ 				return PTR_ERR(map);
++ 			}
++ 
++-			/* store map pointer inside BPF_LD_IMM64 instruction */
++-			insn[0].imm = (u32) (unsigned long) map;
++-			insn[1].imm = ((u64) (unsigned long) map) >> 32;
+++			err = check_map_prog_compatibility(env, map, env->prog);
+++			if (err) {
+++				fdput(f);
+++				return err;
+++			}
+++
+++			aux = &env->insn_aux_data[i];
+++			if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
+++				addr = (unsigned long)map;
+++			} else {
+++				u32 off = insn[1].imm;
+++
+++				if (off >= BPF_MAX_VAR_OFF) {
+++					verbose(env, "direct value offset of %u is not allowed\n", off);
+++					fdput(f);
+++					return -EINVAL;
+++				}
+++
+++				if (!map->ops->map_direct_value_addr) {
+++					verbose(env, "no direct value access support for this map type\n");
+++					fdput(f);
+++					return -EINVAL;
+++				}
+++
+++				err = map->ops->map_direct_value_addr(map, &addr, off);
+++				if (err) {
+++					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
+++						map->value_size, off);
+++					fdput(f);
+++					return err;
+++				}
+++
+++				aux->map_off = off;
+++				addr += off;
+++			}
+++
+++			insn[0].imm = (u32)addr;
+++			insn[1].imm = addr >> 32;
++ 
++ 			/* check whether we recorded this map already */
++-			for (j = 0; j < env->used_map_cnt; j++)
+++			for (j = 0; j < env->used_map_cnt; j++) {
++ 				if (env->used_maps[j] == map) {
+++					aux->map_index = j;
++ 					fdput(f);
++ 					goto next_insn;
++ 				}
+++			}
++ 
++ 			if (env->used_map_cnt >= MAX_USED_MAPS) {
++ 				fdput(f);
++@@ -2048,19 +8254,31 @@ static int replace_map_fd_with_map_ptr(s
++ 			/* hold the map. If the program is rejected by verifier,
++ 			 * the map will be released by release_maps() or it
++ 			 * will be used by the valid program until it's unloaded
++-			 * and all maps are released in free_bpf_prog_info()
+++			 * and all maps are released in free_used_maps()
++ 			 */
++ 			map = bpf_map_inc(map, false);
++ 			if (IS_ERR(map)) {
++ 				fdput(f);
++ 				return PTR_ERR(map);
++ 			}
+++
+++			aux->map_index = env->used_map_cnt;
++ 			env->used_maps[env->used_map_cnt++] = map;
++ 
+++			if (bpf_map_is_cgroup_storage(map))
+++				return -EINVAL;
+++
++ 			fdput(f);
++ next_insn:
++ 			insn++;
++ 			i++;
+++			continue;
+++		}
+++
+++		/* Basic sanity check before we invest more work here. */
+++		if (!bpf_opcode_in_insntable(insn->code)) {
+++			verbose(env, "unknown opcode %02x\n", insn->code);
+++			return -EINVAL;
++ 		}
++ 	}
++ 
++@@ -2072,7 +8290,7 @@ next_insn:
++ }
++ 
++ /* drop refcnt of maps used by the rejected program */
++-static void release_maps(struct verifier_env *env)
+++static void release_maps(struct bpf_verifier_env *env)
++ {
++ 	int i;
++ 
++@@ -2081,7 +8299,7 @@ static void release_maps(struct verifier
++ }
++ 
++ /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
++-static void convert_pseudo_ld_imm64(struct verifier_env *env)
+++static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
++ {
++ 	struct bpf_insn *insn = env->prog->insnsi;
++ 	int insn_cnt = env->prog->len;
++@@ -2092,201 +8310,1266 @@ static void convert_pseudo_ld_imm64(stru
++ 			insn->src_reg = 0;
++ }
++ 
++-static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
+++/* single env->prog->insni[off] instruction was replaced with the range
+++ * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
+++ * [0, off) and [off, end) to new locations, so the patched range stays zero
+++ */
+++static int adjust_insn_aux_data(struct bpf_verifier_env *env,
+++				struct bpf_prog *new_prog, u32 off, u32 cnt)
++ {
++-	struct bpf_insn *insn = prog->insnsi;
++-	int insn_cnt = prog->len;
+++	struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
+++	struct bpf_insn *insn = new_prog->insnsi;
+++	bool old_seen = old_data[off].seen;
+++	u32 prog_len;
++ 	int i;
++ 
++-	for (i = 0; i < insn_cnt; i++, insn++) {
++-		if (BPF_CLASS(insn->code) != BPF_JMP ||
++-		    BPF_OP(insn->code) == BPF_CALL ||
++-		    BPF_OP(insn->code) == BPF_EXIT)
+++	/* aux info at OFF always needs adjustment, no matter fast path
+++	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
+++	 * original insn at old prog.
+++	 */
+++	old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
+++
+++	if (cnt == 1)
+++		return 0;
+++	prog_len = new_prog->len;
+++	new_data = vzalloc(array_size(prog_len,
+++				      sizeof(struct bpf_insn_aux_data)));
+++	if (!new_data)
+++		return -ENOMEM;
+++	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
+++	memcpy(new_data + off + cnt - 1, old_data + off,
+++	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+++	for (i = off; i < off + cnt - 1; i++) {
+++		/* Expand insni[off]'s seen count to the patched range. */
+++		new_data[i].seen = old_seen;
+++		new_data[i].zext_dst = insn_has_def32(env, insn + i);
+++	}
+++	env->insn_aux_data = new_data;
+++	vfree(old_data);
+++	return 0;
+++}
+++
+++static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
+++{
+++	int i;
+++
+++	if (len == 1)
+++		return;
+++	/* NOTE: fake 'exit' subprog should be updated as well. */
+++	for (i = 0; i <= env->subprog_cnt; i++) {
+++		if (env->subprog_info[i].start <= off)
++ 			continue;
+++		env->subprog_info[i].start += len - 1;
+++	}
+++}
+++
+++static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
+++					    const struct bpf_insn *patch, u32 len)
+++{
+++	struct bpf_prog *new_prog;
+++
+++	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
+++	if (IS_ERR(new_prog)) {
+++		if (PTR_ERR(new_prog) == -ERANGE)
+++			verbose(env,
+++				"insn %d cannot be patched due to 16-bit range\n",
+++				env->insn_aux_data[off].orig_idx);
+++		return NULL;
+++	}
+++	if (adjust_insn_aux_data(env, new_prog, off, len))
+++		return NULL;
+++	adjust_subprog_starts(env, off, len);
+++	return new_prog;
+++}
++ 
++-		/* adjust offset of jmps if necessary */
++-		if (i < pos && i + insn->off + 1 > pos)
++-			insn->off += delta;
++-		else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
++-			insn->off -= delta;
+++static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
+++					      u32 off, u32 cnt)
+++{
+++	int i, j;
+++
+++	/* find first prog starting at or after off (first to remove) */
+++	for (i = 0; i < env->subprog_cnt; i++)
+++		if (env->subprog_info[i].start >= off)
+++			break;
+++	/* find first prog starting at or after off + cnt (first to stay) */
+++	for (j = i; j < env->subprog_cnt; j++)
+++		if (env->subprog_info[j].start >= off + cnt)
+++			break;
+++	/* if j doesn't start exactly at off + cnt, we are just removing
+++	 * the front of previous prog
+++	 */
+++	if (env->subprog_info[j].start != off + cnt)
+++		j--;
+++
+++	if (j > i) {
+++		struct bpf_prog_aux *aux = env->prog->aux;
+++		int move;
+++
+++		/* move fake 'exit' subprog as well */
+++		move = env->subprog_cnt + 1 - j;
+++
+++		memmove(env->subprog_info + i,
+++			env->subprog_info + j,
+++			sizeof(*env->subprog_info) * move);
+++		env->subprog_cnt -= j - i;
+++
+++		/* remove func_info */
+++		if (aux->func_info) {
+++			move = aux->func_info_cnt - j;
+++
+++			memmove(aux->func_info + i,
+++				aux->func_info + j,
+++				sizeof(*aux->func_info) * move);
+++			aux->func_info_cnt -= j - i;
+++			/* func_info->insn_off is set after all code rewrites,
+++			 * in adjust_btf_func() - no need to adjust
+++			 */
+++		}
+++	} else {
+++		/* convert i from "first prog to remove" to "first to adjust" */
+++		if (env->subprog_info[i].start == off)
+++			i++;
++ 	}
+++
+++	/* update fake 'exit' subprog as well */
+++	for (; i <= env->subprog_cnt; i++)
+++		env->subprog_info[i].start -= cnt;
+++
+++	return 0;
+++}
+++
+++static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
+++				      u32 cnt)
+++{
+++	struct bpf_prog *prog = env->prog;
+++	u32 i, l_off, l_cnt, nr_linfo;
+++	struct bpf_line_info *linfo;
+++
+++	nr_linfo = prog->aux->nr_linfo;
+++	if (!nr_linfo)
+++		return 0;
+++
+++	linfo = prog->aux->linfo;
+++
+++	/* find first line info to remove, count lines to be removed */
+++	for (i = 0; i < nr_linfo; i++)
+++		if (linfo[i].insn_off >= off)
+++			break;
+++
+++	l_off = i;
+++	l_cnt = 0;
+++	for (; i < nr_linfo; i++)
+++		if (linfo[i].insn_off < off + cnt)
+++			l_cnt++;
+++		else
+++			break;
+++
+++	/* First live insn doesn't match first live linfo, it needs to "inherit"
+++	 * last removed linfo.  prog is already modified, so prog->len == off
+++	 * means no live instructions after (tail of the program was removed).
+++	 */
+++	if (prog->len != off && l_cnt &&
+++	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
+++		l_cnt--;
+++		linfo[--i].insn_off = off + cnt;
+++	}
+++
+++	/* remove the line info which refer to the removed instructions */
+++	if (l_cnt) {
+++		memmove(linfo + l_off, linfo + i,
+++			sizeof(*linfo) * (nr_linfo - i));
+++
+++		prog->aux->nr_linfo -= l_cnt;
+++		nr_linfo = prog->aux->nr_linfo;
+++	}
+++
+++	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
+++	for (i = l_off; i < nr_linfo; i++)
+++		linfo[i].insn_off -= cnt;
+++
+++	/* fix up all subprogs (incl. 'exit') which start >= off */
+++	for (i = 0; i <= env->subprog_cnt; i++)
+++		if (env->subprog_info[i].linfo_idx > l_off) {
+++			/* program may have started in the removed region but
+++			 * may not be fully removed
+++			 */
+++			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
+++				env->subprog_info[i].linfo_idx -= l_cnt;
+++			else
+++				env->subprog_info[i].linfo_idx = l_off;
+++		}
+++
+++	return 0;
+++}
+++
+++static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
+++{
+++	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+++	unsigned int orig_prog_len = env->prog->len;
+++	int err;
+++
+++	err = bpf_remove_insns(env->prog, off, cnt);
+++	if (err)
+++		return err;
+++
+++	err = adjust_subprog_starts_after_remove(env, off, cnt);
+++	if (err)
+++		return err;
+++
+++	err = bpf_adj_linfo_after_remove(env, off, cnt);
+++	if (err)
+++		return err;
+++
+++	memmove(aux_data + off,	aux_data + off + cnt,
+++		sizeof(*aux_data) * (orig_prog_len - off - cnt));
+++
+++	return 0;
++ }
++ 
++-/* convert load instructions that access fields of 'struct __sk_buff'
++- * into sequence of instructions that access fields of 'struct sk_buff'
+++/* The verifier does more data flow analysis than llvm and will not
+++ * explore branches that are dead at run time. Malicious programs can
+++ * have dead code too. Therefore replace all dead at-run-time code
+++ * with 'ja -1'.
+++ *
+++ * Just nops are not optimal, e.g. if they would sit at the end of the
+++ * program and through another bug we would manage to jump there, then
+++ * we'd execute beyond program memory otherwise. Returning exception
+++ * code also wouldn't work since we can have subprogs where the dead
+++ * code could be located.
++  */
++-static int convert_ctx_accesses(struct verifier_env *env)
+++static void sanitize_dead_code(struct bpf_verifier_env *env)
++ {
+++	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+++	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
++ 	struct bpf_insn *insn = env->prog->insnsi;
+++	const int insn_cnt = env->prog->len;
+++	int i;
+++
+++	for (i = 0; i < insn_cnt; i++) {
+++		if (aux_data[i].seen)
+++			continue;
+++		memcpy(insn + i, &trap, sizeof(trap));
+++	}
+++}
+++
+++static bool insn_is_cond_jump(u8 code)
+++{
+++	u8 op;
+++
+++	if (BPF_CLASS(code) == BPF_JMP32)
+++		return true;
+++
+++	if (BPF_CLASS(code) != BPF_JMP)
+++		return false;
+++
+++	op = BPF_OP(code);
+++	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
+++}
+++
+++static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
+++{
+++	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+++	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
+++	struct bpf_insn *insn = env->prog->insnsi;
+++	const int insn_cnt = env->prog->len;
+++	int i;
+++
+++	for (i = 0; i < insn_cnt; i++, insn++) {
+++		if (!insn_is_cond_jump(insn->code))
+++			continue;
+++
+++		if (!aux_data[i + 1].seen)
+++			ja.off = insn->off;
+++		else if (!aux_data[i + 1 + insn->off].seen)
+++			ja.off = 0;
+++		else
+++			continue;
+++
+++		memcpy(insn, &ja, sizeof(ja));
+++	}
+++}
+++
+++static int opt_remove_dead_code(struct bpf_verifier_env *env)
+++{
+++	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
++ 	int insn_cnt = env->prog->len;
++-	struct bpf_insn insn_buf[16];
+++	int i, err;
+++
+++	for (i = 0; i < insn_cnt; i++) {
+++		int j;
+++
+++		j = 0;
+++		while (i + j < insn_cnt && !aux_data[i + j].seen)
+++			j++;
+++		if (!j)
+++			continue;
+++
+++		err = verifier_remove_insns(env, i, j);
+++		if (err)
+++			return err;
+++		insn_cnt = env->prog->len;
+++	}
+++
+++	return 0;
+++}
+++
+++static int opt_remove_nops(struct bpf_verifier_env *env)
+++{
+++	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
+++	struct bpf_insn *insn = env->prog->insnsi;
+++	int insn_cnt = env->prog->len;
+++	int i, err;
+++
+++	for (i = 0; i < insn_cnt; i++) {
+++		if (memcmp(&insn[i], &ja, sizeof(ja)))
+++			continue;
+++
+++		err = verifier_remove_insns(env, i, 1);
+++		if (err)
+++			return err;
+++		insn_cnt--;
+++		i--;
+++	}
+++
+++	return 0;
+++}
+++
+++static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
+++					 const union bpf_attr *attr)
+++{
+++	struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
+++	struct bpf_insn_aux_data *aux = env->insn_aux_data;
+++	int i, patch_len, delta = 0, len = env->prog->len;
+++	struct bpf_insn *insns = env->prog->insnsi;
+++	struct bpf_prog *new_prog;
+++	bool rnd_hi32;
+++
+++	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
+++	zext_patch[1] = BPF_ZEXT_REG(0);
+++	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
+++	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
+++	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
+++	for (i = 0; i < len; i++) {
+++		int adj_idx = i + delta;
+++		struct bpf_insn insn;
+++
+++		insn = insns[adj_idx];
+++		if (!aux[adj_idx].zext_dst) {
+++			u8 code, class;
+++			u32 imm_rnd;
+++
+++			if (!rnd_hi32)
+++				continue;
+++
+++			code = insn.code;
+++			class = BPF_CLASS(code);
+++			if (insn_no_def(&insn))
+++				continue;
+++
+++			/* NOTE: arg "reg" (the fourth one) is only used for
+++			 *       BPF_STX which has been ruled out in above
+++			 *       check, it is safe to pass NULL here.
+++			 */
+++			if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
+++				if (class == BPF_LD &&
+++				    BPF_MODE(code) == BPF_IMM)
+++					i++;
+++				continue;
+++			}
+++
+++			/* ctx load could be transformed into wider load. */
+++			if (class == BPF_LDX &&
+++			    aux[adj_idx].ptr_type == PTR_TO_CTX)
+++				continue;
+++
+++			imm_rnd = get_random_int();
+++			rnd_hi32_patch[0] = insn;
+++			rnd_hi32_patch[1].imm = imm_rnd;
+++			rnd_hi32_patch[3].dst_reg = insn.dst_reg;
+++			patch = rnd_hi32_patch;
+++			patch_len = 4;
+++			goto apply_patch_buffer;
+++		}
+++
+++		if (!bpf_jit_needs_zext())
+++			continue;
+++
+++		zext_patch[0] = insn;
+++		zext_patch[1].dst_reg = insn.dst_reg;
+++		zext_patch[1].src_reg = insn.dst_reg;
+++		patch = zext_patch;
+++		patch_len = 2;
+++apply_patch_buffer:
+++		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
+++		if (!new_prog)
+++			return -ENOMEM;
+++		env->prog = new_prog;
+++		insns = new_prog->insnsi;
+++		aux = env->insn_aux_data;
+++		delta += patch_len - 1;
+++	}
+++
+++	return 0;
+++}
+++
+++/* convert load instructions that access fields of a context type into a
+++ * sequence of instructions that access fields of the underlying structure:
+++ *     struct __sk_buff    -> struct sk_buff
+++ *     struct bpf_sock_ops -> struct sock
+++ */
+++static int convert_ctx_accesses(struct bpf_verifier_env *env)
+++{
+++	const struct bpf_verifier_ops *ops = env->ops;
+++	int i, cnt, size, ctx_field_size, delta = 0;
+++	const int insn_cnt = env->prog->len;
+++	struct bpf_insn insn_buf[16], *insn;
+++	u32 target_size, size_default, off;
++ 	struct bpf_prog *new_prog;
++-	u32 cnt;
++-	int i;
++ 	enum bpf_access_type type;
+++	bool is_narrower_load;
+++
+++	if (ops->gen_prologue || env->seen_direct_write) {
+++		if (!ops->gen_prologue) {
+++			verbose(env, "bpf verifier is misconfigured\n");
+++			return -EINVAL;
+++		}
+++		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
+++					env->prog);
+++		if (cnt >= ARRAY_SIZE(insn_buf)) {
+++			verbose(env, "bpf verifier is misconfigured\n");
+++			return -EINVAL;
+++		} else if (cnt) {
+++			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
+++			if (!new_prog)
+++				return -ENOMEM;
+++
+++			env->prog = new_prog;
+++			delta += cnt - 1;
+++		}
+++	}
++ 
++-	if (!env->prog->aux->ops->convert_ctx_access)
+++	if (bpf_prog_is_dev_bound(env->prog->aux))
++ 		return 0;
++ 
+++	insn = env->prog->insnsi + delta;
+++
++ 	for (i = 0; i < insn_cnt; i++, insn++) {
++-		if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
+++		bpf_convert_ctx_access_t convert_ctx_access;
+++
+++		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
+++		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
+++		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
+++		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
++ 			type = BPF_READ;
++-		else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
+++		else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
+++			 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
+++			 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+++			 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
++ 			type = BPF_WRITE;
++ 		else
++ 			continue;
++ 
++-		if (insn->imm != PTR_TO_CTX) {
++-			/* clear internal mark */
++-			insn->imm = 0;
+++		if (type == BPF_WRITE &&
+++		    env->insn_aux_data[i + delta].sanitize_stack_off) {
+++			struct bpf_insn patch[] = {
+++				/* Sanitize suspicious stack slot with zero.
+++				 * There are no memory dependencies for this store,
+++				 * since it's only using frame pointer and immediate
+++				 * constant of zero
+++				 */
+++				BPF_ST_MEM(BPF_DW, BPF_REG_FP,
+++					   env->insn_aux_data[i + delta].sanitize_stack_off,
+++					   0),
+++				/* the original STX instruction will immediately
+++				 * overwrite the same stack slot with appropriate value
+++				 */
+++				*insn,
+++			};
+++
+++			cnt = ARRAY_SIZE(patch);
+++			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
+++			if (!new_prog)
+++				return -ENOMEM;
+++
+++			delta    += cnt - 1;
+++			env->prog = new_prog;
+++			insn      = new_prog->insnsi + i + delta;
+++			continue;
+++		}
+++
+++		switch (env->insn_aux_data[i + delta].ptr_type) {
+++		case PTR_TO_CTX:
+++			if (!ops->convert_ctx_access)
+++				continue;
+++			convert_ctx_access = ops->convert_ctx_access;
+++			break;
+++		default:
++ 			continue;
++ 		}
++ 
++-		cnt = env->prog->aux->ops->
++-			convert_ctx_access(type, insn->dst_reg, insn->src_reg,
++-					   insn->off, insn_buf, env->prog);
++-		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
++-			verbose("bpf verifier is misconfigured\n");
+++		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
+++		size = BPF_LDST_BYTES(insn);
+++
+++		/* If the read access is a narrower load of the field,
+++		 * convert to a 4/8-byte load, to minimum program type specific
+++		 * convert_ctx_access changes. If conversion is successful,
+++		 * we will apply proper mask to the result.
+++		 */
+++		is_narrower_load = size < ctx_field_size;
+++		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
+++		off = insn->off;
+++		if (is_narrower_load) {
+++			u8 size_code;
+++
+++			if (type == BPF_WRITE) {
+++				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
+++				return -EINVAL;
+++			}
+++
+++			size_code = BPF_H;
+++			if (ctx_field_size == 4)
+++				size_code = BPF_W;
+++			else if (ctx_field_size == 8)
+++				size_code = BPF_DW;
+++
+++			insn->off = off & ~(size_default - 1);
+++			insn->code = BPF_LDX | BPF_MEM | size_code;
+++		}
+++
+++		target_size = 0;
+++		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
+++					 &target_size);
+++		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
+++		    (ctx_field_size && !target_size)) {
+++			verbose(env, "bpf verifier is misconfigured\n");
++ 			return -EINVAL;
++ 		}
++ 
++-		if (cnt == 1) {
++-			memcpy(insn, insn_buf, sizeof(*insn));
++-			continue;
+++		if (is_narrower_load && size < target_size) {
+++			u8 shift = bpf_ctx_narrow_access_offset(
+++				off, size, size_default) * 8;
+++			if (ctx_field_size <= 4) {
+++				if (shift)
+++					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
+++									insn->dst_reg,
+++									shift);
+++				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
+++								(1 << size * 8) - 1);
+++			} else {
+++				if (shift)
+++					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
+++									insn->dst_reg,
+++									shift);
+++				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
+++								(1ULL << size * 8) - 1);
+++			}
++ 		}
++ 
++-		/* several new insns need to be inserted. Make room for them */
++-		insn_cnt += cnt - 1;
++-		new_prog = bpf_prog_realloc(env->prog,
++-					    bpf_prog_size(insn_cnt),
++-					    GFP_USER);
+++		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
++ 		if (!new_prog)
++ 			return -ENOMEM;
++ 
++-		new_prog->len = insn_cnt;
+++		delta += cnt - 1;
++ 
++-		memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1,
++-			sizeof(*insn) * (insn_cnt - i - cnt));
+++		/* keep walking new program and skip insns we just inserted */
+++		env->prog = new_prog;
+++		insn      = new_prog->insnsi + i + delta;
+++	}
++ 
++-		/* copy substitute insns in place of load instruction */
++-		memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt);
+++	return 0;
+++}
++ 
++-		/* adjust branches in the whole program */
++-		adjust_branches(new_prog, i, cnt - 1);
+++static int jit_subprogs(struct bpf_verifier_env *env)
+++{
+++	struct bpf_prog *prog = env->prog, **func, *tmp;
+++	int i, j, subprog_start, subprog_end = 0, len, subprog;
+++	struct bpf_insn *insn;
+++	void *old_bpf_func;
+++	int err;
++ 
++-		/* keep walking new program and skip insns we just inserted */
++-		env->prog = new_prog;
++-		insn = new_prog->insnsi + i + cnt - 1;
++-		i += cnt - 1;
+++	if (env->subprog_cnt <= 1)
+++		return 0;
+++
+++	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+++		if (insn->code != (BPF_JMP | BPF_CALL) ||
+++		    insn->src_reg != BPF_PSEUDO_CALL)
+++			continue;
+++		/* Upon error here we cannot fall back to interpreter but
+++		 * need a hard reject of the program. Thus -EFAULT is
+++		 * propagated in any case.
+++		 */
+++		subprog = find_subprog(env, i + insn->imm + 1);
+++		if (subprog < 0) {
+++			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+++				  i + insn->imm + 1);
+++			return -EFAULT;
+++		}
+++		/* temporarily remember subprog id inside insn instead of
+++		 * aux_data, since next loop will split up all insns into funcs
+++		 */
+++		insn->off = subprog;
+++		/* remember original imm in case JIT fails and fallback
+++		 * to interpreter will be needed
+++		 */
+++		env->insn_aux_data[i].call_imm = insn->imm;
+++		/* point imm to __bpf_call_base+1 from JITs point of view */
+++		insn->imm = 1;
+++	}
+++
+++	err = bpf_prog_alloc_jited_linfo(prog);
+++	if (err)
+++		goto out_undo_insn;
+++
+++	err = -ENOMEM;
+++	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
+++	if (!func)
+++		goto out_undo_insn;
+++
+++	for (i = 0; i < env->subprog_cnt; i++) {
+++		subprog_start = subprog_end;
+++		subprog_end = env->subprog_info[i + 1].start;
+++
+++		len = subprog_end - subprog_start;
+++		/* BPF_PROG_RUN doesn't call subprogs directly,
+++		 * hence main prog stats include the runtime of subprogs.
+++		 * subprogs don't have IDs and not reachable via prog_get_next_id
+++		 * func[i]->aux->stats will never be accessed and stays NULL
+++		 */
+++		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
+++		if (!func[i])
+++			goto out_free;
+++		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
+++		       len * sizeof(struct bpf_insn));
+++		func[i]->type = prog->type;
+++		func[i]->len = len;
+++		if (bpf_prog_calc_tag(func[i]))
+++			goto out_free;
+++		func[i]->is_func = 1;
+++		func[i]->aux->func_idx = i;
+++		/* the btf and func_info will be freed only at prog->aux */
+++		func[i]->aux->btf = prog->aux->btf;
+++		func[i]->aux->func_info = prog->aux->func_info;
+++
+++		/* Use bpf_prog_F_tag to indicate functions in stack traces.
+++		 * Long term would need debug info to populate names
+++		 */
+++		func[i]->aux->name[0] = 'F';
+++		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
+++		func[i]->jit_requested = 1;
+++		func[i]->aux->linfo = prog->aux->linfo;
+++		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
+++		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
+++		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
+++		func[i] = bpf_int_jit_compile(func[i]);
+++		if (!func[i]->jited) {
+++			err = -ENOTSUPP;
+++			goto out_free;
+++		}
+++		cond_resched();
+++	}
+++	/* at this point all bpf functions were successfully JITed
+++	 * now populate all bpf_calls with correct addresses and
+++	 * run last pass of JIT
+++	 */
+++	for (i = 0; i < env->subprog_cnt; i++) {
+++		insn = func[i]->insnsi;
+++		for (j = 0; j < func[i]->len; j++, insn++) {
+++			if (insn->code != (BPF_JMP | BPF_CALL) ||
+++			    insn->src_reg != BPF_PSEUDO_CALL)
+++				continue;
+++			subprog = insn->off;
+++			insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
+++				    __bpf_call_base;
+++		}
+++
+++		/* we use the aux data to keep a list of the start addresses
+++		 * of the JITed images for each function in the program
+++		 *
+++		 * for some architectures, such as powerpc64, the imm field
+++		 * might not be large enough to hold the offset of the start
+++		 * address of the callee's JITed image from __bpf_call_base
+++		 *
+++		 * in such cases, we can lookup the start address of a callee
+++		 * by using its subprog id, available from the off field of
+++		 * the call instruction, as an index for this list
+++		 */
+++		func[i]->aux->func = func;
+++		func[i]->aux->func_cnt = env->subprog_cnt;
+++	}
+++	for (i = 0; i < env->subprog_cnt; i++) {
+++		old_bpf_func = func[i]->bpf_func;
+++		tmp = bpf_int_jit_compile(func[i]);
+++		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
+++			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
+++			err = -ENOTSUPP;
+++			goto out_free;
+++		}
+++		cond_resched();
+++	}
+++
+++	/* finally lock prog and jit images for all functions and
+++	 * populate kallsysm
+++	 */
+++	for (i = 0; i < env->subprog_cnt; i++) {
+++		bpf_prog_lock_ro(func[i]);
+++		bpf_prog_kallsyms_add(func[i]);
+++	}
+++
+++	/* Last step: make now unused interpreter insns from main
+++	 * prog consistent for later dump requests, so they can
+++	 * later look the same as if they were interpreted only.
+++	 */
+++	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+++		if (insn->code != (BPF_JMP | BPF_CALL) ||
+++		    insn->src_reg != BPF_PSEUDO_CALL)
+++			continue;
+++		insn->off = env->insn_aux_data[i].call_imm;
+++		subprog = find_subprog(env, i + insn->off + 1);
+++		insn->imm = subprog;
++ 	}
++ 
+++	prog->jited = 1;
+++	prog->bpf_func = func[0]->bpf_func;
+++	prog->aux->func = func;
+++	prog->aux->func_cnt = env->subprog_cnt;
+++	bpf_prog_free_unused_jited_linfo(prog);
++ 	return 0;
+++out_free:
+++	for (i = 0; i < env->subprog_cnt; i++)
+++		if (func[i])
+++			bpf_jit_free(func[i]);
+++	kfree(func);
+++out_undo_insn:
+++	/* cleanup main prog to be interpreted */
+++	prog->jit_requested = 0;
+++	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+++		if (insn->code != (BPF_JMP | BPF_CALL) ||
+++		    insn->src_reg != BPF_PSEUDO_CALL)
+++			continue;
+++		insn->off = 0;
+++		insn->imm = env->insn_aux_data[i].call_imm;
+++	}
+++	bpf_prog_free_jited_linfo(prog);
+++	return err;
++ }
++ 
++-static void free_states(struct verifier_env *env)
+++static int fixup_call_args(struct bpf_verifier_env *env)
++ {
++-	struct verifier_state_list *sl, *sln;
+++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+++	struct bpf_prog *prog = env->prog;
+++	struct bpf_insn *insn = prog->insnsi;
+++	int i, depth;
+++#endif
+++	int err = 0;
+++
+++	if (env->prog->jit_requested &&
+++	    !bpf_prog_is_dev_bound(env->prog->aux)) {
+++		err = jit_subprogs(env);
+++		if (err == 0)
+++			return 0;
+++		if (err == -EFAULT)
+++			return err;
+++	}
+++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+++	for (i = 0; i < prog->len; i++, insn++) {
+++		if (insn->code != (BPF_JMP | BPF_CALL) ||
+++		    insn->src_reg != BPF_PSEUDO_CALL)
+++			continue;
+++		depth = get_callee_stack_depth(env, insn, i);
+++		if (depth < 0)
+++			return depth;
+++		bpf_patch_call_args(insn, depth);
+++	}
+++	err = 0;
+++#endif
+++	return err;
+++}
+++
+++/* fixup insn->imm field of bpf_call instructions
+++ * and inline eligible helpers as explicit sequence of BPF instructions
+++ *
+++ * this function is called after eBPF program passed verification
+++ */
+++static int fixup_bpf_calls(struct bpf_verifier_env *env)
+++{
+++	struct bpf_prog *prog = env->prog;
+++	struct bpf_insn *insn = prog->insnsi;
+++	const struct bpf_func_proto *fn;
+++	const int insn_cnt = prog->len;
+++	const struct bpf_map_ops *ops;
+++	struct bpf_insn_aux_data *aux;
+++	struct bpf_insn insn_buf[16];
+++	struct bpf_prog *new_prog;
+++	struct bpf_map *map_ptr;
+++	int i, cnt, delta = 0;
+++
+++	for (i = 0; i < insn_cnt; i++, insn++) {
+++		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
+++		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
+++		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
+++		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
+++			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+++			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+++			struct bpf_insn *patchlet;
+++			struct bpf_insn chk_and_div[] = {
+++				/* [R,W]x div 0 -> 0 */
+++				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+++					     BPF_JNE | BPF_K, insn->src_reg,
+++					     0, 2, 0),
+++				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
+++				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+++				*insn,
+++			};
+++			struct bpf_insn chk_and_mod[] = {
+++				/* [R,W]x mod 0 -> [R,W]x */
+++				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+++					     BPF_JEQ | BPF_K, insn->src_reg,
+++					     0, 1 + (is64 ? 0 : 1), 0),
+++				*insn,
+++				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+++				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
+++			};
+++
+++			patchlet = isdiv ? chk_and_div : chk_and_mod;
+++			cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
+++				      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
+++
+++			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
+++			if (!new_prog)
+++				return -ENOMEM;
+++
+++			delta    += cnt - 1;
+++			env->prog = prog = new_prog;
+++			insn      = new_prog->insnsi + i + delta;
+++			continue;
+++		}
+++
+++		if (BPF_CLASS(insn->code) == BPF_LD &&
+++		    (BPF_MODE(insn->code) == BPF_ABS ||
+++		     BPF_MODE(insn->code) == BPF_IND)) {
+++			cnt = env->ops->gen_ld_abs(insn, insn_buf);
+++			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+++				verbose(env, "bpf verifier is misconfigured\n");
+++				return -EINVAL;
+++			}
+++
+++			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+++			if (!new_prog)
+++				return -ENOMEM;
+++
+++			delta    += cnt - 1;
+++			env->prog = prog = new_prog;
+++			insn      = new_prog->insnsi + i + delta;
+++			continue;
+++		}
+++
+++		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
+++		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
+++			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
+++			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
+++			struct bpf_insn insn_buf[16];
+++			struct bpf_insn *patch = &insn_buf[0];
+++			bool issrc, isneg, isimm;
+++			u32 off_reg;
+++
+++			aux = &env->insn_aux_data[i + delta];
+++			if (!aux->alu_state ||
+++			    aux->alu_state == BPF_ALU_NON_POINTER)
+++				continue;
+++
+++			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
+++			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
+++				BPF_ALU_SANITIZE_SRC;
+++			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
+++
+++			off_reg = issrc ? insn->src_reg : insn->dst_reg;
+++			if (isimm) {
+++				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
+++			} else {
+++				if (isneg)
+++					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
+++				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
+++				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
+++				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
+++				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
+++				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
+++				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
+++			}
+++			if (!issrc)
+++				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
+++			insn->src_reg = BPF_REG_AX;
+++			if (isneg)
+++				insn->code = insn->code == code_add ?
+++					     code_sub : code_add;
+++			*patch++ = *insn;
+++			if (issrc && isneg && !isimm)
+++				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
+++			cnt = patch - insn_buf;
+++
+++			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+++			if (!new_prog)
+++				return -ENOMEM;
+++
+++			delta    += cnt - 1;
+++			env->prog = prog = new_prog;
+++			insn      = new_prog->insnsi + i + delta;
+++			continue;
+++		}
+++
+++		if (insn->code != (BPF_JMP | BPF_CALL))
+++			continue;
+++		if (insn->src_reg == BPF_PSEUDO_CALL)
+++			continue;
+++
+++		if (insn->imm == BPF_FUNC_get_route_realm)
+++			prog->dst_needed = 1;
+++		if (insn->imm == BPF_FUNC_get_prandom_u32)
+++			bpf_user_rnd_init_once();
+++		if (insn->imm == BPF_FUNC_override_return)
+++			prog->kprobe_override = 1;
+++		if (insn->imm == BPF_FUNC_tail_call) {
+++			/* If we tail call into other programs, we
+++			 * cannot make any assumptions since they can
+++			 * be replaced dynamically during runtime in
+++			 * the program array.
+++			 */
+++			prog->cb_access = 1;
+++			env->prog->aux->stack_depth = MAX_BPF_STACK;
+++			env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
+++
+++			/* mark bpf_tail_call as different opcode to avoid
+++			 * conditional branch in the interpeter for every normal
+++			 * call and to prevent accidental JITing by JIT compiler
+++			 * that doesn't support bpf_tail_call yet
+++			 */
+++			insn->imm = 0;
+++			insn->code = BPF_JMP | BPF_TAIL_CALL;
+++
+++			aux = &env->insn_aux_data[i + delta];
+++			if (!bpf_map_ptr_unpriv(aux))
+++				continue;
+++
+++			/* instead of changing every JIT dealing with tail_call
+++			 * emit two extra insns:
+++			 * if (index >= max_entries) goto out;
+++			 * index &= array->index_mask;
+++			 * to avoid out-of-bounds cpu speculation
+++			 */
+++			if (bpf_map_ptr_poisoned(aux)) {
+++				verbose(env, "tail_call abusing map_ptr\n");
+++				return -EINVAL;
+++			}
+++
+++			map_ptr = BPF_MAP_PTR(aux->map_state);
+++			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
+++						  map_ptr->max_entries, 2);
+++			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
+++						    container_of(map_ptr,
+++								 struct bpf_array,
+++								 map)->index_mask);
+++			insn_buf[2] = *insn;
+++			cnt = 3;
+++			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+++			if (!new_prog)
+++				return -ENOMEM;
+++
+++			delta    += cnt - 1;
+++			env->prog = prog = new_prog;
+++			insn      = new_prog->insnsi + i + delta;
+++			continue;
+++		}
+++
+++		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
+++		 * and other inlining handlers are currently limited to 64 bit
+++		 * only.
+++		 */
+++		if (prog->jit_requested && BITS_PER_LONG == 64 &&
+++		    (insn->imm == BPF_FUNC_map_lookup_elem ||
+++		     insn->imm == BPF_FUNC_map_update_elem ||
+++		     insn->imm == BPF_FUNC_map_delete_elem ||
+++		     insn->imm == BPF_FUNC_map_push_elem   ||
+++		     insn->imm == BPF_FUNC_map_pop_elem    ||
+++		     insn->imm == BPF_FUNC_map_peek_elem)) {
+++			aux = &env->insn_aux_data[i + delta];
+++			if (bpf_map_ptr_poisoned(aux))
+++				goto patch_call_imm;
+++
+++			map_ptr = BPF_MAP_PTR(aux->map_state);
+++			ops = map_ptr->ops;
+++			if (insn->imm == BPF_FUNC_map_lookup_elem &&
+++			    ops->map_gen_lookup) {
+++				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
+++				if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+++					verbose(env, "bpf verifier is misconfigured\n");
+++					return -EINVAL;
+++				}
+++
+++				new_prog = bpf_patch_insn_data(env, i + delta,
+++							       insn_buf, cnt);
+++				if (!new_prog)
+++					return -ENOMEM;
+++
+++				delta    += cnt - 1;
+++				env->prog = prog = new_prog;
+++				insn      = new_prog->insnsi + i + delta;
+++				continue;
+++			}
+++
+++			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
+++				     (void *(*)(struct bpf_map *map, void *key))NULL));
+++			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
+++				     (int (*)(struct bpf_map *map, void *key))NULL));
+++			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
+++				     (int (*)(struct bpf_map *map, void *key, void *value,
+++					      u64 flags))NULL));
+++			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
+++				     (int (*)(struct bpf_map *map, void *value,
+++					      u64 flags))NULL));
+++			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
+++				     (int (*)(struct bpf_map *map, void *value))NULL));
+++			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
+++				     (int (*)(struct bpf_map *map, void *value))NULL));
+++
+++			switch (insn->imm) {
+++			case BPF_FUNC_map_lookup_elem:
+++				insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
+++					    __bpf_call_base;
+++				continue;
+++			case BPF_FUNC_map_update_elem:
+++				insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
+++					    __bpf_call_base;
+++				continue;
+++			case BPF_FUNC_map_delete_elem:
+++				insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
+++					    __bpf_call_base;
+++				continue;
+++			case BPF_FUNC_map_push_elem:
+++				insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
+++					    __bpf_call_base;
+++				continue;
+++			case BPF_FUNC_map_pop_elem:
+++				insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
+++					    __bpf_call_base;
+++				continue;
+++			case BPF_FUNC_map_peek_elem:
+++				insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
+++					    __bpf_call_base;
+++				continue;
+++			}
+++
+++			goto patch_call_imm;
+++		}
+++
+++patch_call_imm:
+++		fn = env->ops->get_func_proto(insn->imm, env->prog);
+++		/* all functions that have prototype and verifier allowed
+++		 * programs to call them, must be real in-kernel functions
+++		 */
+++		if (!fn->func) {
+++			verbose(env,
+++				"kernel subsystem misconfigured func %s#%d\n",
+++				func_id_name(insn->imm), insn->imm);
+++			return -EFAULT;
+++		}
+++		insn->imm = fn->func - __bpf_call_base;
+++	}
+++
+++	return 0;
+++}
+++
+++static void free_states(struct bpf_verifier_env *env)
+++{
+++	struct bpf_verifier_state_list *sl, *sln;
++ 	int i;
++ 
+++	sl = env->free_list;
+++	while (sl) {
+++		sln = sl->next;
+++		free_verifier_state(&sl->state, false);
+++		kfree(sl);
+++		sl = sln;
+++	}
+++
++ 	if (!env->explored_states)
++ 		return;
++ 
++-	for (i = 0; i < env->prog->len; i++) {
+++	for (i = 0; i < state_htab_size(env); i++) {
++ 		sl = env->explored_states[i];
++ 
++-		if (sl)
++-			while (sl != STATE_LIST_MARK) {
++-				sln = sl->next;
++-				kfree(sl);
++-				sl = sln;
++-			}
+++		while (sl) {
+++			sln = sl->next;
+++			free_verifier_state(&sl->state, false);
+++			kfree(sl);
+++			sl = sln;
+++		}
++ 	}
++ 
++-	kfree(env->explored_states);
+++	kvfree(env->explored_states);
++ }
++ 
++-int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
+++static void print_verification_stats(struct bpf_verifier_env *env)
++ {
++-	char __user *log_ubuf = NULL;
++-	struct verifier_env *env;
++-	int ret = -EINVAL;
+++	int i;
++ 
++-	if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
++-		return -E2BIG;
+++	if (env->log.level & BPF_LOG_STATS) {
+++		verbose(env, "verification time %lld usec\n",
+++			div_u64(env->verification_time, 1000));
+++		verbose(env, "stack depth ");
+++		for (i = 0; i < env->subprog_cnt; i++) {
+++			u32 depth = env->subprog_info[i].stack_depth;
+++
+++			verbose(env, "%d", depth);
+++			if (i + 1 < env->subprog_cnt)
+++				verbose(env, "+");
+++		}
+++		verbose(env, "\n");
+++	}
+++	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
+++		"total_states %d peak_states %d mark_read %d\n",
+++		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
+++		env->max_states_per_insn, env->total_states,
+++		env->peak_states, env->longest_mark_read_walk);
+++}
+++
+++int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
+++	      union bpf_attr __user *uattr)
+++{
+++	u64 start_time = ktime_get_ns();
+++	struct bpf_verifier_env *env;
+++	struct bpf_verifier_log *log;
+++	int i, len, ret = -EINVAL;
+++	bool is_priv;
++ 
++-	/* 'struct verifier_env' can be global, but since it's not small,
+++	/* no program is valid */
+++	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
+++		return -EINVAL;
+++
+++	/* 'struct bpf_verifier_env' can be global, but since it's not small,
++ 	 * allocate/free it every time bpf_check() is called
++ 	 */
++-	env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL);
+++	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
++ 	if (!env)
++ 		return -ENOMEM;
+++	log = &env->log;
++ 
+++	len = (*prog)->len;
+++	env->insn_aux_data =
+++		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
+++	ret = -ENOMEM;
+++	if (!env->insn_aux_data)
+++		goto err_free_env;
+++	for (i = 0; i < len; i++)
+++		env->insn_aux_data[i].orig_idx = i;
++ 	env->prog = *prog;
+++	env->ops = bpf_verifier_ops[env->prog->type];
+++	is_priv = capable(CAP_SYS_ADMIN);
++ 
++ 	/* grab the mutex to protect few globals used by verifier */
++-	mutex_lock(&bpf_verifier_lock);
+++	if (!is_priv)
+++		mutex_lock(&bpf_verifier_lock);
++ 
++ 	if (attr->log_level || attr->log_buf || attr->log_size) {
++ 		/* user requested verbose verifier output
++ 		 * and supplied buffer to store the verification trace
++ 		 */
++-		log_level = attr->log_level;
++-		log_ubuf = (char __user *) (unsigned long) attr->log_buf;
++-		log_size = attr->log_size;
++-		log_len = 0;
+++		log->level = attr->log_level;
+++		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
+++		log->len_total = attr->log_size;
++ 
++ 		ret = -EINVAL;
++-		/* log_* values have to be sane */
++-		if (log_size < 128 || log_size > UINT_MAX >> 8 ||
++-		    log_level == 0 || log_ubuf == NULL)
++-			goto free_env;
++-
++-		ret = -ENOMEM;
++-		log_buf = vmalloc(log_size);
++-		if (!log_buf)
++-			goto free_env;
++-	} else {
++-		log_level = 0;
++-	}
+++		/* log attributes have to be sane */
+++		if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
+++		    !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
+++			goto err_unlock;
+++	}
+++
+++	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
+++	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+++		env->strict_alignment = true;
+++	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
+++		env->strict_alignment = false;
+++
+++	env->allow_ptr_leaks = is_priv;
+++
+++	if (is_priv)
+++		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
++ 
++ 	ret = replace_map_fd_with_map_ptr(env);
++ 	if (ret < 0)
++ 		goto skip_full_check;
++ 
++-	env->explored_states = kcalloc(env->prog->len,
++-				       sizeof(struct verifier_state_list *),
+++	env->explored_states = kcalloc(state_htab_size(env),
+++				       sizeof(struct bpf_verifier_state_list *),
++ 				       GFP_USER);
++ 	ret = -ENOMEM;
++ 	if (!env->explored_states)
++ 		goto skip_full_check;
++ 
++-	ret = check_cfg(env);
+++	ret = check_subprogs(env);
+++	if (ret < 0)
+++		goto skip_full_check;
+++
+++	ret = check_btf_info(env, attr, uattr);
++ 	if (ret < 0)
++ 		goto skip_full_check;
++ 
++-	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+++	ret = check_cfg(env);
+++	if (ret < 0)
+++		goto skip_full_check;
++ 
++ 	ret = do_check(env);
+++	if (env->cur_state) {
+++		free_verifier_state(env->cur_state, true);
+++		env->cur_state = NULL;
+++	}
++ 
++ skip_full_check:
++-	while (pop_stack(env, NULL) >= 0);
+++	while (!pop_stack(env, NULL, NULL));
++ 	free_states(env);
++ 
++ 	if (ret == 0)
+++		ret = check_max_stack_depth(env);
+++
+++	/* instruction rewrites happen after this point */
+++	if (is_priv) {
+++		if (ret == 0)
+++			opt_hard_wire_dead_code_branches(env);
+++		if (ret == 0)
+++			ret = opt_remove_dead_code(env);
+++		if (ret == 0)
+++			ret = opt_remove_nops(env);
+++	} else {
+++		if (ret == 0)
+++			sanitize_dead_code(env);
+++	}
+++
+++	if (ret == 0)
++ 		/* program is valid, convert *(u32*)(ctx + off) accesses */
++ 		ret = convert_ctx_accesses(env);
++ 
++-	if (log_level && log_len >= log_size - 1) {
++-		BUG_ON(log_len >= log_size);
++-		/* verifier log exceeded user supplied buffer */
++-		ret = -ENOSPC;
++-		/* fall through to return what was recorded */
+++	if (ret == 0)
+++		ret = fixup_bpf_calls(env);
+++
+++	/* do 32-bit optimization after insn patching has done so those patched
+++	 * insns could be handled correctly.
+++	 */
+++	if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
+++		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
+++		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
+++								     : false;
++ 	}
++ 
++-	/* copy verifier log back to user space including trailing zero */
++-	if (log_level && copy_to_user(log_ubuf, log_buf, log_len + 1) != 0) {
+++	if (ret == 0)
+++		ret = fixup_call_args(env);
+++
+++	env->verification_time = ktime_get_ns() - start_time;
+++	print_verification_stats(env);
+++
+++	if (log->level && bpf_verifier_log_full(log))
+++		ret = -ENOSPC;
+++	if (log->level && !log->ubuf) {
++ 		ret = -EFAULT;
++-		goto free_log_buf;
+++		goto err_release_maps;
++ 	}
++ 
++ 	if (ret == 0 && env->used_map_cnt) {
++@@ -2297,7 +9580,7 @@ skip_full_check:
++ 
++ 		if (!env->prog->aux->used_maps) {
++ 			ret = -ENOMEM;
++-			goto free_log_buf;
+++			goto err_release_maps;
++ 		}
++ 
++ 		memcpy(env->prog->aux->used_maps, env->used_maps,
++@@ -2310,17 +9593,21 @@ skip_full_check:
++ 		convert_pseudo_ld_imm64(env);
++ 	}
++ 
++-free_log_buf:
++-	if (log_level)
++-		vfree(log_buf);
++-free_env:
+++	if (ret == 0)
+++		adjust_btf_func(env);
+++
+++err_release_maps:
++ 	if (!env->prog->aux->used_maps)
++ 		/* if we didn't copy map pointers into bpf_prog_info, release
++-		 * them now. Otherwise free_bpf_prog_info() will release them.
+++		 * them now. Otherwise free_used_maps() will release them.
++ 		 */
++ 		release_maps(env);
++ 	*prog = env->prog;
+++err_unlock:
+++	if (!is_priv)
+++		mutex_unlock(&bpf_verifier_lock);
+++	vfree(env->insn_aux_data);
+++err_free_env:
++ 	kfree(env);
++-	mutex_unlock(&bpf_verifier_lock);
++ 	return ret;
++ }
++--- a/include/linux/filter.h
+++++ b/include/linux/filter.h
++@@ -1,3 +1,4 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
++ /*
++  * Linux Socket Filter Data Structures
++  */
++@@ -7,16 +8,22 @@
++ #include <stdarg.h>
++ 
++ #include <linux/atomic.h>
+++#include <linux/refcount.h>
++ #include <linux/compat.h>
++ #include <linux/skbuff.h>
++ #include <linux/linkage.h>
++ #include <linux/printk.h>
++ #include <linux/workqueue.h>
++ #include <linux/sched.h>
++-#include <net/sch_generic.h>
+++#include <linux/capability.h>
+++#include <linux/cryptohash.h>
+++#include <linux/set_memory.h>
+++#include <linux/kallsyms.h>
+++#include <linux/if_vlan.h>
++ 
++-#include <asm/cacheflush.h>
+++#include <net/sch_generic.h>
++ 
+++#include <asm/byteorder.h>
++ #include <uapi/linux/filter.h>
++ #include <uapi/linux/bpf.h>
++ 
++@@ -24,6 +31,11 @@ struct sk_buff;
++ struct sock;
++ struct seccomp_data;
++ struct bpf_prog_aux;
+++struct xdp_rxq_info;
+++struct xdp_buff;
+++struct sock_reuseport;
+++struct ctl_table;
+++struct ctl_table_header;
++ 
++ /* ArgX, context and stack frame pointer register positions. Note,
++  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
++@@ -40,7 +52,26 @@ struct bpf_prog_aux;
++ /* Additional register mappings for converted user programs. */
++ #define BPF_REG_A	BPF_REG_0
++ #define BPF_REG_X	BPF_REG_7
++-#define BPF_REG_TMP	BPF_REG_8
+++#define BPF_REG_TMP	BPF_REG_2	/* scratch reg */
+++#define BPF_REG_D	BPF_REG_8	/* data, callee-saved */
+++#define BPF_REG_H	BPF_REG_9	/* hlen, callee-saved */
+++
+++/* Kernel hidden auxiliary/helper register. */
+++#define BPF_REG_AX		MAX_BPF_REG
+++#define MAX_BPF_EXT_REG		(MAX_BPF_REG + 1)
+++#define MAX_BPF_JIT_REG		MAX_BPF_EXT_REG
+++
+++/* unused opcode to mark special call to bpf_tail_call() helper */
+++#define BPF_TAIL_CALL	0xf0
+++
+++/* unused opcode to mark call to interpreter with arguments */
+++#define BPF_CALL_ARGS	0xe0
+++
+++/* As per nm, we expose JITed images as text (code) section for
+++ * kallsyms. That way, tools like perf can find it to match
+++ * addresses.
+++ */
+++#define BPF_SYM_ELF_TYPE	't'
++ 
++ /* BPF program can access up to 512 bytes of stack space. */
++ #define MAX_BPF_STACK	512
++@@ -129,6 +160,20 @@ struct bpf_prog_aux;
++ 		.off   = 0,					\
++ 		.imm   = IMM })
++ 
+++/* Special form of mov32, used for doing explicit zero extension on dst. */
+++#define BPF_ZEXT_REG(DST)					\
+++	((struct bpf_insn) {					\
+++		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
+++		.dst_reg = DST,					\
+++		.src_reg = DST,					\
+++		.off   = 0,					\
+++		.imm   = 1 })
+++
+++static inline bool insn_is_zext(const struct bpf_insn *insn)
+++{
+++	return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1;
+++}
+++
++ /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
++ #define BPF_LD_IMM64(DST, IMM)					\
++ 	BPF_LD_IMM64_RAW(DST, 0, IMM)
++@@ -249,8 +294,51 @@ struct bpf_prog_aux;
++ 		.off   = OFF,					\
++ 		.imm   = IMM })
++ 
+++/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
+++
+++#define BPF_JMP32_REG(OP, DST, SRC, OFF)			\
+++	((struct bpf_insn) {					\
+++		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_X,	\
+++		.dst_reg = DST,					\
+++		.src_reg = SRC,					\
+++		.off   = OFF,					\
+++		.imm   = 0 })
+++
+++/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
+++
+++#define BPF_JMP32_IMM(OP, DST, IMM, OFF)			\
+++	((struct bpf_insn) {					\
+++		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_K,	\
+++		.dst_reg = DST,					\
+++		.src_reg = 0,					\
+++		.off   = OFF,					\
+++		.imm   = IMM })
+++
+++/* Unconditional jumps, goto pc + off16 */
+++
+++#define BPF_JMP_A(OFF)						\
+++	((struct bpf_insn) {					\
+++		.code  = BPF_JMP | BPF_JA,			\
+++		.dst_reg = 0,					\
+++		.src_reg = 0,					\
+++		.off   = OFF,					\
+++		.imm   = 0 })
+++
+++/* Relative call */
+++
+++#define BPF_CALL_REL(TGT)					\
+++	((struct bpf_insn) {					\
+++		.code  = BPF_JMP | BPF_CALL,			\
+++		.dst_reg = 0,					\
+++		.src_reg = BPF_PSEUDO_CALL,			\
+++		.off   = 0,					\
+++		.imm   = TGT })
+++
++ /* Function call */
++ 
+++#define BPF_CAST_CALL(x)					\
+++		((u64 (*)(u64, u64, u64, u64, u64))(x))
+++
++ #define BPF_EMIT_CALL(FUNC)					\
++ 	((struct bpf_insn) {					\
++ 		.code  = BPF_JMP | BPF_CALL,			\
++@@ -303,6 +391,112 @@ struct bpf_prog_aux;
++ 	bpf_size;						\
++ })
++ 
+++#define bpf_size_to_bytes(bpf_size)				\
+++({								\
+++	int bytes = -EINVAL;					\
+++								\
+++	if (bpf_size == BPF_B)					\
+++		bytes = sizeof(u8);				\
+++	else if (bpf_size == BPF_H)				\
+++		bytes = sizeof(u16);				\
+++	else if (bpf_size == BPF_W)				\
+++		bytes = sizeof(u32);				\
+++	else if (bpf_size == BPF_DW)				\
+++		bytes = sizeof(u64);				\
+++								\
+++	bytes;							\
+++})
+++
+++#define BPF_SIZEOF(type)					\
+++	({							\
+++		const int __size = bytes_to_bpf_size(sizeof(type)); \
+++		BUILD_BUG_ON(__size < 0);			\
+++		__size;						\
+++	})
+++
+++#define BPF_FIELD_SIZEOF(type, field)				\
+++	({							\
+++		const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \
+++		BUILD_BUG_ON(__size < 0);			\
+++		__size;						\
+++	})
+++
+++#define BPF_LDST_BYTES(insn)					\
+++	({							\
+++		const int __size = bpf_size_to_bytes(BPF_SIZE((insn)->code)); \
+++		WARN_ON(__size < 0);				\
+++		__size;						\
+++	})
+++
+++#define __BPF_MAP_0(m, v, ...) v
+++#define __BPF_MAP_1(m, v, t, a, ...) m(t, a)
+++#define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__)
+++#define __BPF_MAP_3(m, v, t, a, ...) m(t, a), __BPF_MAP_2(m, v, __VA_ARGS__)
+++#define __BPF_MAP_4(m, v, t, a, ...) m(t, a), __BPF_MAP_3(m, v, __VA_ARGS__)
+++#define __BPF_MAP_5(m, v, t, a, ...) m(t, a), __BPF_MAP_4(m, v, __VA_ARGS__)
+++
+++#define __BPF_REG_0(...) __BPF_PAD(5)
+++#define __BPF_REG_1(...) __BPF_MAP(1, __VA_ARGS__), __BPF_PAD(4)
+++#define __BPF_REG_2(...) __BPF_MAP(2, __VA_ARGS__), __BPF_PAD(3)
+++#define __BPF_REG_3(...) __BPF_MAP(3, __VA_ARGS__), __BPF_PAD(2)
+++#define __BPF_REG_4(...) __BPF_MAP(4, __VA_ARGS__), __BPF_PAD(1)
+++#define __BPF_REG_5(...) __BPF_MAP(5, __VA_ARGS__)
+++
+++#define __BPF_MAP(n, ...) __BPF_MAP_##n(__VA_ARGS__)
+++#define __BPF_REG(n, ...) __BPF_REG_##n(__VA_ARGS__)
+++
+++#define __BPF_CAST(t, a)						       \
+++	(__force t)							       \
+++	(__force							       \
+++	 typeof(__builtin_choose_expr(sizeof(t) == sizeof(unsigned long),      \
+++				      (unsigned long)0, (t)0))) a
+++#define __BPF_V void
+++#define __BPF_N
+++
+++#define __BPF_DECL_ARGS(t, a) t   a
+++#define __BPF_DECL_REGS(t, a) u64 a
+++
+++#define __BPF_PAD(n)							       \
+++	__BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2,       \
+++		  u64, __ur_3, u64, __ur_4, u64, __ur_5)
+++
+++#define BPF_CALL_x(x, name, ...)					       \
+++	static __always_inline						       \
+++	u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__));   \
+++	u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__));	       \
+++	u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__))	       \
+++	{								       \
+++		return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
+++	}								       \
+++	static __always_inline						       \
+++	u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
+++
+++#define BPF_CALL_0(name, ...)	BPF_CALL_x(0, name, __VA_ARGS__)
+++#define BPF_CALL_1(name, ...)	BPF_CALL_x(1, name, __VA_ARGS__)
+++#define BPF_CALL_2(name, ...)	BPF_CALL_x(2, name, __VA_ARGS__)
+++#define BPF_CALL_3(name, ...)	BPF_CALL_x(3, name, __VA_ARGS__)
+++#define BPF_CALL_4(name, ...)	BPF_CALL_x(4, name, __VA_ARGS__)
+++#define BPF_CALL_5(name, ...)	BPF_CALL_x(5, name, __VA_ARGS__)
+++
+++#define bpf_ctx_range(TYPE, MEMBER)						\
+++	offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
+++#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2)				\
+++	offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1
+++#if BITS_PER_LONG == 64
+++# define bpf_ctx_range_ptr(TYPE, MEMBER)					\
+++	offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
+++#else
+++# define bpf_ctx_range_ptr(TYPE, MEMBER)					\
+++	offsetof(TYPE, MEMBER) ... offsetof(TYPE, MEMBER) + 8 - 1
+++#endif /* BITS_PER_LONG == 64 */
+++
+++#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE)				\
+++	({									\
+++		BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE));		\
+++		*(PTR_SIZE) = (SIZE);						\
+++		offsetof(TYPE, MEMBER);						\
+++	})
+++
++ #ifdef CONFIG_COMPAT
++ /* A struct sock_filter is architecture independent. */
++ struct compat_sock_fprog {
++@@ -317,24 +511,33 @@ struct sock_fprog_kern {
++ };
++ 
++ struct bpf_binary_header {
++-	unsigned int pages;
++-	u8 image[];
+++	u32 pages;
+++	/* Some arches need word alignment for their instructions */
+++	u8 image[] __aligned(4);
++ };
++ 
++ struct bpf_prog {
++ 	u16			pages;		/* Number of allocated pages */
++-	kmemcheck_bitfield_begin(meta);
++ 	u16			jited:1,	/* Is our filter JIT'ed? */
+++				jit_requested:1,/* archs need to JIT the prog */
+++				undo_set_mem:1,	/* Passed set_memory_ro() checkpoint */
++ 				gpl_compatible:1, /* Is filter GPL compatible? */
++ 				cb_access:1,	/* Is control block accessed? */
++-				dst_needed:1;	/* Do we need dst entry? */
++-	kmemcheck_bitfield_end(meta);
++-	u32			len;		/* Number of filter blocks */
+++				dst_needed:1,	/* Do we need dst entry? */
+++				blinded:1,	/* Was blinded */
+++				is_func:1,	/* program is a bpf function */
+++				kprobe_override:1, /* Do we override a kprobe? */
+++				has_callchain_buf:1, /* callchain buffer allocated? */
+++				enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */
++ 	enum bpf_prog_type	type;		/* Type of BPF program */
+++	enum bpf_attach_type	expected_attach_type; /* For some prog types */
+++	u32			len;		/* Number of filter blocks */
+++	u32			jited_len;	/* Size of jited insns in bytes */
+++	u8			tag[BPF_TAG_SIZE];
++ 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
++ 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
++-	unsigned int		(*bpf_func)(const struct sk_buff *skb,
++-					    const struct bpf_insn *filter);
+++	unsigned int		(*bpf_func)(const void *ctx,
+++					    const struct bpf_insn *insn);
++ 	/* Instructions for interpreter */
++ 	union {
++ 		struct sock_filter	insns[0];
++@@ -343,44 +546,160 @@ struct bpf_prog {
++ };
++ 
++ struct sk_filter {
++-	atomic_t	refcnt;
+++	refcount_t	refcnt;
++ 	struct rcu_head	rcu;
++ 	struct bpf_prog	*prog;
++ };
++ 
++-#define BPF_PROG_RUN(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
+++#define BPF_PROG_RUN(prog, ctx)	({				\
+++	u32 ret;						\
+++	ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi);	\
+++	ret; })
+++
+++#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
+++
+++struct bpf_skb_data_end {
+++	struct qdisc_skb_cb qdisc_cb;
+++	void *data_meta;
+++	void *data_end;
+++};
++ 
++-static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
++-				       struct sk_buff *skb)
+++struct bpf_redirect_info {
+++	u32 flags;
+++	u32 tgt_index;
+++	void *tgt_value;
+++	struct bpf_map *map;
+++	struct bpf_map *map_to_flush;
+++	u32 kern_flags;
+++};
+++
+++DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
+++
+++/* flags for bpf_redirect_info kern_flags */
+++#define BPF_RI_F_RF_NO_DIRECT	BIT(0)	/* no napi_direct on return_frame */
+++
+++/* Compute the linear packet data range [data, data_end) which
+++ * will be accessed by various program types (cls_bpf, act_bpf,
+++ * lwt, ...). Subsystems allowing direct data access must (!)
+++ * ensure that cb[] area can be written to when BPF program is
+++ * invoked (otherwise cb[] save/restore is necessary).
+++ */
+++static inline void bpf_compute_data_pointers(struct sk_buff *skb)
++ {
++-	u8 *cb_data = qdisc_skb_cb(skb)->data;
++-	u8 saved_cb[QDISC_CB_PRIV_LEN];
++-	u32 res;
+++	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+++
+++	BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb));
+++	cb->data_meta = skb->data;
+++	cb->data_end  = skb->data + skb_headlen(skb);
+++}
++ 
+++/* Similar to bpf_compute_data_pointers(), except that save orginal
+++ * data in cb->data and cb->meta_data for restore.
+++ */
+++static inline void bpf_compute_and_save_data_end(
+++	struct sk_buff *skb, void **saved_data_end)
+++{
+++	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+++
+++	*saved_data_end = cb->data_end;
+++	cb->data_end  = skb->data + skb_headlen(skb);
+++}
+++
+++/* Restore data saved by bpf_compute_data_pointers(). */
+++static inline void bpf_restore_data_end(
+++	struct sk_buff *skb, void *saved_data_end)
+++{
+++	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+++
+++	cb->data_end = saved_data_end;
+++}
+++
+++static inline u8 *bpf_skb_cb(struct sk_buff *skb)
+++{
+++	/* eBPF programs may read/write skb->cb[] area to transfer meta
+++	 * data between tail calls. Since this also needs to work with
+++	 * tc, that scratch memory is mapped to qdisc_skb_cb's data area.
+++	 *
+++	 * In some socket filter cases, the cb unfortunately needs to be
+++	 * saved/restored so that protocol specific skb->cb[] data won't
+++	 * be lost. In any case, due to unpriviledged eBPF programs
+++	 * attached to sockets, we need to clear the bpf_skb_cb() area
+++	 * to not leak previous contents to user space.
+++	 */
+++	BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != BPF_SKB_CB_LEN);
++ 	BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) !=
++-		     QDISC_CB_PRIV_LEN);
+++		     FIELD_SIZEOF(struct qdisc_skb_cb, data));
+++
+++	return qdisc_skb_cb(skb)->data;
+++}
+++
+++static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
+++					 struct sk_buff *skb)
+++{
+++	u8 *cb_data = bpf_skb_cb(skb);
+++	u8 cb_saved[BPF_SKB_CB_LEN];
+++	u32 res;
++ 
++ 	if (unlikely(prog->cb_access)) {
++-		memcpy(saved_cb, cb_data, sizeof(saved_cb));
++-		memset(cb_data, 0, sizeof(saved_cb));
+++		memcpy(cb_saved, cb_data, sizeof(cb_saved));
+++		memset(cb_data, 0, sizeof(cb_saved));
++ 	}
++ 
++ 	res = BPF_PROG_RUN(prog, skb);
++ 
++ 	if (unlikely(prog->cb_access))
++-		memcpy(cb_data, saved_cb, sizeof(saved_cb));
+++		memcpy(cb_data, cb_saved, sizeof(cb_saved));
+++
+++	return res;
+++}
+++
+++static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
+++				       struct sk_buff *skb)
+++{
+++	u32 res;
++ 
+++	preempt_disable();
+++	res = __bpf_prog_run_save_cb(prog, skb);
+++	preempt_enable();
++ 	return res;
++ }
++ 
++ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
++ 					struct sk_buff *skb)
++ {
++-	u8 *cb_data = qdisc_skb_cb(skb)->data;
+++	u8 *cb_data = bpf_skb_cb(skb);
+++	u32 res;
++ 
++ 	if (unlikely(prog->cb_access))
++-		memset(cb_data, 0, QDISC_CB_PRIV_LEN);
++-	return BPF_PROG_RUN(prog, skb);
+++		memset(cb_data, 0, BPF_SKB_CB_LEN);
+++
+++	preempt_disable();
+++	res = BPF_PROG_RUN(prog, skb);
+++	preempt_enable();
+++	return res;
+++}
+++
+++static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
+++					    struct xdp_buff *xdp)
+++{
+++	/* Caller needs to hold rcu_read_lock() (!), otherwise program
+++	 * can be released while still running, or map elements could be
+++	 * freed early while still having concurrent users. XDP fastpath
+++	 * already takes rcu_read_lock() when fetching the program, so
+++	 * it's not necessary here anymore.
+++	 */
+++	return BPF_PROG_RUN(prog, xdp);
+++}
+++
+++static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
+++{
+++	return prog->len * sizeof(struct bpf_insn);
+++}
+++
+++static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog)
+++{
+++	return round_up(bpf_prog_insn_size(prog) +
+++			sizeof(__be64) + 1, SHA_MESSAGE_BYTES);
++ }
++ 
++ static inline unsigned int bpf_prog_size(unsigned int proglen)
++@@ -399,27 +718,77 @@ static inline bool bpf_prog_was_classic(
++ 	return prog->type == BPF_PROG_TYPE_UNSPEC;
++ }
++ 
+++static inline u32 bpf_ctx_off_adjust_machine(u32 size)
+++{
+++	const u32 size_machine = sizeof(unsigned long);
+++
+++	if (size > size_machine && size % size_machine == 0)
+++		size = size_machine;
+++
+++	return size;
+++}
+++
+++static inline bool
+++bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
+++{
+++	return size <= size_default && (size & (size - 1)) == 0;
+++}
+++
+++static inline u8
+++bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default)
+++{
+++	u8 access_off = off & (size_default - 1);
+++
+++#ifdef __LITTLE_ENDIAN
+++	return access_off;
+++#else
+++	return size_default - (access_off + size);
+++#endif
+++}
+++
+++#define bpf_ctx_wide_access_ok(off, size, type, field)			\
+++	(size == sizeof(__u64) &&					\
+++	off >= offsetof(type, field) &&					\
+++	off + sizeof(__u64) <= offsetofend(type, field) &&		\
+++	off % sizeof(__u64) == 0)
+++
++ #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
++ 
++-#ifdef CONFIG_DEBUG_SET_MODULE_RONX
++ static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
++ {
++-	set_memory_ro((unsigned long)fp, fp->pages);
+++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+++	if (!fp->jited) {
+++		fp->undo_set_mem = 1;
+++		set_memory_ro((unsigned long)fp, fp->pages);
+++	}
+++#endif
++ }
++ 
++ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
++ {
++-	set_memory_rw((unsigned long)fp, fp->pages);
+++	if (fp->undo_set_mem)
+++		set_memory_rw((unsigned long)fp, fp->pages);
++ }
++-#else
++-static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
+++
+++static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
++ {
+++	set_memory_ro((unsigned long)hdr, hdr->pages);
+++	set_memory_x((unsigned long)hdr, hdr->pages);
++ }
++ 
++-static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
+++static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
+++{
+++	set_memory_rw((unsigned long)hdr, hdr->pages);
+++}
+++
+++static inline struct bpf_binary_header *
+++bpf_jit_binary_hdr(const struct bpf_prog *fp)
++ {
+++	unsigned long real_start = (unsigned long)fp->bpf_func;
+++	unsigned long addr = real_start & PAGE_MASK;
+++
+++	return (void *)addr;
++ }
++-#endif /* CONFIG_DEBUG_SET_MODULE_RONX */
++ 
++ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
++ static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
++@@ -427,10 +796,20 @@ static inline int sk_filter(struct sock
++ 	return sk_filter_trim_cap(sk, skb, 1);
++ }
++ 
++-int bpf_prog_select_runtime(struct bpf_prog *fp);
+++struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
++ void bpf_prog_free(struct bpf_prog *fp);
++ 
+++bool bpf_opcode_in_insntable(u8 code);
+++
+++void bpf_prog_free_linfo(struct bpf_prog *prog);
+++void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
+++			       const u32 *insn_to_jit_off);
+++int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog);
+++void bpf_prog_free_jited_linfo(struct bpf_prog *prog);
+++void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog);
+++
++ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
+++struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags);
++ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
++ 				  gfp_t gfp_extra_flags);
++ void __bpf_prog_free(struct bpf_prog *fp);
++@@ -450,12 +829,11 @@ int bpf_prog_create_from_user(struct bpf
++ void bpf_prog_destroy(struct bpf_prog *fp);
++ 
++ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
++-int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
++-		       bool locked);
++ int sk_attach_bpf(u32 ufd, struct sock *sk);
+++int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
+++int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
+++void sk_reuseport_prog_free(struct bpf_prog *prog);
++ int sk_detach_filter(struct sock *sk);
++-int __sk_detach_filter(struct sock *sk, bool locked);
++-
++ int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
++ 		  unsigned int len);
++ 
++@@ -463,10 +841,100 @@ bool sk_filter_charge(struct sock *sk, s
++ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
++ 
++ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
++-void bpf_int_jit_compile(struct bpf_prog *fp);
++-bool bpf_helper_changes_skb_data(void *func);
+++#define __bpf_call_base_args \
+++	((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \
+++	 (void *)__bpf_call_base)
+++
+++struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
+++void bpf_jit_compile(struct bpf_prog *prog);
+++bool bpf_jit_needs_zext(void);
+++bool bpf_helper_changes_pkt_data(void *func);
+++
+++static inline bool bpf_dump_raw_ok(const struct cred *cred)
+++{
+++	/* Reconstruction of call-sites is dependent on kallsyms,
+++	 * thus make dump the same restriction.
+++	 */
+++	return true;
+++}
+++
+++struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
+++				       const struct bpf_insn *patch, u32 len);
+++int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
+++
+++void bpf_clear_redirect_map(struct bpf_map *map);
+++
+++static inline bool xdp_return_frame_no_direct(void)
+++{
+++	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+++
+++	return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT;
+++}
+++
+++static inline void xdp_set_return_frame_no_direct(void)
+++{
+++	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+++
+++	ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT;
+++}
+++
+++static inline void xdp_clear_return_frame_no_direct(void)
+++{
+++	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+++
+++	ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT;
+++}
+++
+++static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
+++				 unsigned int pktlen)
+++{
+++	unsigned int len;
+++
+++	if (unlikely(!(fwd->flags & IFF_UP)))
+++		return -ENETDOWN;
+++
+++	len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
+++	if (pktlen > len)
+++		return -EMSGSIZE;
+++
+++	return 0;
+++}
+++
+++/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
+++ * same cpu context. Further for best results no more than a single map
+++ * for the do_redirect/do_flush pair should be used. This limitation is
+++ * because we only track one map and force a flush when the map changes.
+++ * This does not appear to be a real limitation for existing software.
+++ */
+++int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
+++			    struct xdp_buff *xdp, struct bpf_prog *prog);
+++int xdp_do_redirect(struct net_device *dev,
+++		    struct xdp_buff *xdp,
+++		    struct bpf_prog *prog);
+++void xdp_do_flush_map(void);
+++
+++void bpf_warn_invalid_xdp_action(u32 act);
+++
+++#ifdef CONFIG_INET
+++struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
+++				  struct bpf_prog *prog, struct sk_buff *skb,
+++				  u32 hash);
+++#else
+++static inline struct sock *
+++bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
+++		     struct bpf_prog *prog, struct sk_buff *skb,
+++		     u32 hash)
+++{
+++	return NULL;
+++}
+++#endif
++ 
++ #ifdef CONFIG_BPF_JIT
+++extern int bpf_jit_enable;
+++extern int bpf_jit_harden;
+++extern int bpf_jit_kallsyms;
+++extern long bpf_jit_limit;
+++
++ typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
++ 
++ struct bpf_binary_header *
++@@ -474,10 +942,18 @@ bpf_jit_binary_alloc(unsigned int progle
++ 		     unsigned int alignment,
++ 		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
++ void bpf_jit_binary_free(struct bpf_binary_header *hdr);
++-
++-void bpf_jit_compile(struct bpf_prog *fp);
+++u64 bpf_jit_alloc_exec_limit(void);
+++void *bpf_jit_alloc_exec(unsigned long size);
+++void bpf_jit_free_exec(void *addr);
++ void bpf_jit_free(struct bpf_prog *fp);
++ 
+++int bpf_jit_get_func_addr(const struct bpf_prog *prog,
+++			  const struct bpf_insn *insn, bool extra_pass,
+++			  u64 *func_addr, bool *func_addr_fixed);
+++
+++struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp);
+++void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
+++
++ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
++ 				u32 pass, void *image)
++ {
++@@ -488,17 +964,144 @@ static inline void bpf_jit_dump(unsigned
++ 		print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
++ 			       16, 1, image, proglen, false);
++ }
++-#else
++-static inline void bpf_jit_compile(struct bpf_prog *fp)
+++
+++static inline bool bpf_jit_is_ebpf(void)
+++{
+++# ifdef CONFIG_HAVE_EBPF_JIT
+++	return true;
+++# else
+++	return false;
+++# endif
+++}
+++
+++static inline bool ebpf_jit_enabled(void)
+++{
+++	return bpf_jit_enable && bpf_jit_is_ebpf();
+++}
+++
+++static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
+++{
+++	return fp->jited && bpf_jit_is_ebpf();
+++}
+++
+++static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
+++{
+++	/* These are the prerequisites, should someone ever have the
+++	 * idea to call blinding outside of them, we make sure to
+++	 * bail out.
+++	 */
+++	if (!bpf_jit_is_ebpf())
+++		return false;
+++	if (!prog->jit_requested)
+++		return false;
+++	if (!bpf_jit_harden)
+++		return false;
+++	if (bpf_jit_harden == 1 && capable(CAP_SYS_ADMIN))
+++		return false;
+++
+++	return true;
+++}
+++
+++static inline bool bpf_jit_kallsyms_enabled(void)
++ {
+++	/* There are a couple of corner cases where kallsyms should
+++	 * not be enabled f.e. on hardening.
+++	 */
+++	if (bpf_jit_harden)
+++		return false;
+++	if (!bpf_jit_kallsyms)
+++		return false;
+++	if (bpf_jit_kallsyms == 1)
+++		return true;
+++
+++	return false;
+++}
+++
+++const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
+++				 unsigned long *off, char *sym);
+++bool is_bpf_text_address(unsigned long addr);
+++int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
+++		    char *sym);
+++
+++static inline const char *
+++bpf_address_lookup(unsigned long addr, unsigned long *size,
+++		   unsigned long *off, char **modname, char *sym)
+++{
+++	const char *ret = __bpf_address_lookup(addr, size, off, sym);
+++
+++	if (ret && modname)
+++		*modname = NULL;
+++	return ret;
+++}
+++
+++void bpf_prog_kallsyms_add(struct bpf_prog *fp);
+++void bpf_prog_kallsyms_del(struct bpf_prog *fp);
+++void bpf_get_prog_name(const struct bpf_prog *prog, char *sym);
+++
+++#else /* CONFIG_BPF_JIT */
+++
+++static inline bool ebpf_jit_enabled(void)
+++{
+++	return false;
+++}
+++
+++static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
+++{
+++	return false;
++ }
++ 
++ static inline void bpf_jit_free(struct bpf_prog *fp)
++ {
++ 	bpf_prog_unlock_free(fp);
++ }
+++
+++static inline bool bpf_jit_kallsyms_enabled(void)
+++{
+++	return false;
+++}
+++
+++static inline const char *
+++__bpf_address_lookup(unsigned long addr, unsigned long *size,
+++		     unsigned long *off, char *sym)
+++{
+++	return NULL;
+++}
+++
+++static inline bool is_bpf_text_address(unsigned long addr)
+++{
+++	return false;
+++}
+++
+++static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value,
+++				  char *type, char *sym)
+++{
+++	return -ERANGE;
+++}
+++
+++static inline const char *
+++bpf_address_lookup(unsigned long addr, unsigned long *size,
+++		   unsigned long *off, char **modname, char *sym)
+++{
+++	return NULL;
+++}
+++
+++static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
+++{
+++}
+++
+++static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
+++{
+++}
+++
+++static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+++{
+++	sym[0] = '\0';
+++}
+++
++ #endif /* CONFIG_BPF_JIT */
++ 
+++void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
+++
++ #define BPF_ANC		BIT(15)
++ 
++ static inline bool bpf_needs_clear_a(const struct sock_filter *first)
++@@ -571,4 +1174,59 @@ static inline int bpf_tell_extensions(vo
++ 	return SKF_AD_MAX;
++ }
++ 
+++struct bpf_sock_addr_kern {
+++	struct sock *sk;
+++	struct sockaddr *uaddr;
+++	/* Temporary "register" to make indirect stores to nested structures
+++	 * defined above. We need three registers to make such a store, but
+++	 * only two (src and dst) are available at convert_ctx_access time
+++	 */
+++	u64 tmp_reg;
+++	void *t_ctx;	/* Attach type specific context. */
+++};
+++
+++struct bpf_sock_ops_kern {
+++	struct	sock *sk;
+++	u32	op;
+++	union {
+++		u32 args[4];
+++		u32 reply;
+++		u32 replylong[4];
+++	};
+++	u32	is_fullsock;
+++	u64	temp;			/* temp and everything after is not
+++					 * initialized to 0 before calling
+++					 * the BPF program. New fields that
+++					 * should be initialized to 0 should
+++					 * be inserted before temp.
+++					 * temp is scratch storage used by
+++					 * sock_ops_convert_ctx_access
+++					 * as temporary storage of a register.
+++					 */
+++};
+++
+++struct bpf_sysctl_kern {
+++	struct ctl_table_header *head;
+++	struct ctl_table *table;
+++	void *cur_val;
+++	size_t cur_len;
+++	void *new_val;
+++	size_t new_len;
+++	int new_updated;
+++	int write;
+++	loff_t *ppos;
+++	/* Temporary "register" for indirect stores to ppos. */
+++	u64 tmp_reg;
+++};
+++
+++struct bpf_sockopt_kern {
+++	struct sock	*sk;
+++	u8		*optval;
+++	u8		*optval_end;
+++	s32		level;
+++	s32		optname;
+++	s32		optlen;
+++	s32		retval;
+++};
+++
++ #endif /* __LINUX_FILTER_H__ */
++--- /dev/null
+++++ b/include/linux/set_memory.h
++@@ -0,0 +1,47 @@
+++/* SPDX-License-Identifier: GPL-2.0-only */
+++/*
+++ * Copyright 2017, Michael Ellerman, IBM Corporation.
+++ */
+++#ifndef _LINUX_SET_MEMORY_H_
+++#define _LINUX_SET_MEMORY_H_
+++
+++#include <asm/cacheflush.h>
+++
+++#ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP
+++static inline int set_direct_map_invalid_noflush(struct page *page)
+++{
+++	return 0;
+++}
+++static inline int set_direct_map_default_noflush(struct page *page)
+++{
+++	return 0;
+++}
+++#endif
+++
+++#ifndef set_mce_nospec
+++static inline int set_mce_nospec(unsigned long pfn, bool unmap)
+++{
+++	return 0;
+++}
+++#endif
+++
+++#ifndef clear_mce_nospec
+++static inline int clear_mce_nospec(unsigned long pfn)
+++{
+++	return 0;
+++}
+++#endif
+++
+++#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
+++static inline int set_memory_encrypted(unsigned long addr, int numpages)
+++{
+++	return 0;
+++}
+++
+++static inline int set_memory_decrypted(unsigned long addr, int numpages)
+++{
+++	return 0;
+++}
+++#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
+++
+++#endif /* _LINUX_SET_MEMORY_H_ */
++--- /dev/null
+++++ b/include/trace/events/xdp.h
++@@ -0,0 +1,407 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
+++#undef TRACE_SYSTEM
+++#define TRACE_SYSTEM xdp
+++
+++#if !defined(_TRACE_XDP_H) || defined(TRACE_HEADER_MULTI_READ)
+++#define _TRACE_XDP_H
+++
+++#include <linux/netdevice.h>
+++#include <linux/filter.h>
+++#include <linux/tracepoint.h>
+++#include <linux/bpf.h>
+++
+++#define __XDP_ACT_MAP(FN)	\
+++	FN(ABORTED)		\
+++	FN(DROP)		\
+++	FN(PASS)		\
+++	FN(TX)			\
+++	FN(REDIRECT)
+++
+++#define __XDP_ACT_TP_FN(x)	\
+++	TRACE_DEFINE_ENUM(XDP_##x);
+++#define __XDP_ACT_SYM_FN(x)	\
+++	{ XDP_##x, #x },
+++#define __XDP_ACT_SYM_TAB	\
+++	__XDP_ACT_MAP(__XDP_ACT_SYM_FN) { -1, 0 }
+++__XDP_ACT_MAP(__XDP_ACT_TP_FN)
+++
+++TRACE_EVENT(xdp_exception,
+++
+++	TP_PROTO(const struct net_device *dev,
+++		 const struct bpf_prog *xdp, u32 act),
+++
+++	TP_ARGS(dev, xdp, act),
+++
+++	TP_STRUCT__entry(
+++		__field(int, prog_id)
+++		__field(u32, act)
+++		__field(int, ifindex)
+++	),
+++
+++	TP_fast_assign(
+++		__entry->prog_id	= xdp->aux->id;
+++		__entry->act		= act;
+++		__entry->ifindex	= dev->ifindex;
+++	),
+++
+++	TP_printk("prog_id=%d action=%s ifindex=%d",
+++		  __entry->prog_id,
+++		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+++		  __entry->ifindex)
+++);
+++
+++TRACE_EVENT(xdp_bulk_tx,
+++
+++	TP_PROTO(const struct net_device *dev,
+++		 int sent, int drops, int err),
+++
+++	TP_ARGS(dev, sent, drops, err),
+++
+++	TP_STRUCT__entry(
+++		__field(int, ifindex)
+++		__field(u32, act)
+++		__field(int, drops)
+++		__field(int, sent)
+++		__field(int, err)
+++	),
+++
+++	TP_fast_assign(
+++		__entry->ifindex	= dev->ifindex;
+++		__entry->act		= XDP_TX;
+++		__entry->drops		= drops;
+++		__entry->sent		= sent;
+++		__entry->err		= err;
+++	),
+++
+++	TP_printk("ifindex=%d action=%s sent=%d drops=%d err=%d",
+++		  __entry->ifindex,
+++		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+++		  __entry->sent, __entry->drops, __entry->err)
+++);
+++
+++DECLARE_EVENT_CLASS(xdp_redirect_template,
+++
+++	TP_PROTO(const struct net_device *dev,
+++		 const struct bpf_prog *xdp,
+++		 int to_ifindex, int err,
+++		 const struct bpf_map *map, u32 map_index),
+++
+++	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
+++
+++	TP_STRUCT__entry(
+++		__field(int, prog_id)
+++		__field(u32, act)
+++		__field(int, ifindex)
+++		__field(int, err)
+++		__field(int, to_ifindex)
+++		__field(u32, map_id)
+++		__field(int, map_index)
+++	),
+++
+++	TP_fast_assign(
+++		__entry->prog_id	= xdp->aux->id;
+++		__entry->act		= XDP_REDIRECT;
+++		__entry->ifindex	= dev->ifindex;
+++		__entry->err		= err;
+++		__entry->to_ifindex	= to_ifindex;
+++		__entry->map_id		= map ? map->id : 0;
+++		__entry->map_index	= map_index;
+++	),
+++
+++	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d",
+++		  __entry->prog_id,
+++		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+++		  __entry->ifindex, __entry->to_ifindex,
+++		  __entry->err)
+++);
+++
+++DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
+++	TP_PROTO(const struct net_device *dev,
+++		 const struct bpf_prog *xdp,
+++		 int to_ifindex, int err,
+++		 const struct bpf_map *map, u32 map_index),
+++	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
+++);
+++
+++DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
+++	TP_PROTO(const struct net_device *dev,
+++		 const struct bpf_prog *xdp,
+++		 int to_ifindex, int err,
+++		 const struct bpf_map *map, u32 map_index),
+++	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
+++);
+++
+++#define _trace_xdp_redirect(dev, xdp, to)		\
+++	 trace_xdp_redirect(dev, xdp, to, 0, NULL, 0);
+++
+++#define _trace_xdp_redirect_err(dev, xdp, to, err)	\
+++	 trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0);
+++
+++DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map,
+++	TP_PROTO(const struct net_device *dev,
+++		 const struct bpf_prog *xdp,
+++		 int to_ifindex, int err,
+++		 const struct bpf_map *map, u32 map_index),
+++	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
+++	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
+++		  " map_id=%d map_index=%d",
+++		  __entry->prog_id,
+++		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+++		  __entry->ifindex, __entry->to_ifindex,
+++		  __entry->err,
+++		  __entry->map_id, __entry->map_index)
+++);
+++
+++DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
+++	TP_PROTO(const struct net_device *dev,
+++		 const struct bpf_prog *xdp,
+++		 int to_ifindex, int err,
+++		 const struct bpf_map *map, u32 map_index),
+++	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
+++	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
+++		  " map_id=%d map_index=%d",
+++		  __entry->prog_id,
+++		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+++		  __entry->ifindex, __entry->to_ifindex,
+++		  __entry->err,
+++		  __entry->map_id, __entry->map_index)
+++);
+++
+++#ifndef __DEVMAP_OBJ_TYPE
+++#define __DEVMAP_OBJ_TYPE
+++struct _bpf_dtab_netdev {
+++	struct net_device *dev;
+++};
+++#endif /* __DEVMAP_OBJ_TYPE */
+++
+++#define devmap_ifindex(fwd, map)				\
+++	((map->map_type == BPF_MAP_TYPE_DEVMAP ||		\
+++	  map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) ?		\
+++	  ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)
+++
+++#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)		\
+++	 trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map),	\
+++				0, map, idx)
+++
+++#define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err)	\
+++	 trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map),	\
+++				    err, map, idx)
+++
+++TRACE_EVENT(xdp_cpumap_kthread,
+++
+++	TP_PROTO(int map_id, unsigned int processed,  unsigned int drops,
+++		 int sched),
+++
+++	TP_ARGS(map_id, processed, drops, sched),
+++
+++	TP_STRUCT__entry(
+++		__field(int, map_id)
+++		__field(u32, act)
+++		__field(int, cpu)
+++		__field(unsigned int, drops)
+++		__field(unsigned int, processed)
+++		__field(int, sched)
+++	),
+++
+++	TP_fast_assign(
+++		__entry->map_id		= map_id;
+++		__entry->act		= XDP_REDIRECT;
+++		__entry->cpu		= smp_processor_id();
+++		__entry->drops		= drops;
+++		__entry->processed	= processed;
+++		__entry->sched	= sched;
+++	),
+++
+++	TP_printk("kthread"
+++		  " cpu=%d map_id=%d action=%s"
+++		  " processed=%u drops=%u"
+++		  " sched=%d",
+++		  __entry->cpu, __entry->map_id,
+++		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+++		  __entry->processed, __entry->drops,
+++		  __entry->sched)
+++);
+++
+++TRACE_EVENT(xdp_cpumap_enqueue,
+++
+++	TP_PROTO(int map_id, unsigned int processed,  unsigned int drops,
+++		 int to_cpu),
+++
+++	TP_ARGS(map_id, processed, drops, to_cpu),
+++
+++	TP_STRUCT__entry(
+++		__field(int, map_id)
+++		__field(u32, act)
+++		__field(int, cpu)
+++		__field(unsigned int, drops)
+++		__field(unsigned int, processed)
+++		__field(int, to_cpu)
+++	),
+++
+++	TP_fast_assign(
+++		__entry->map_id		= map_id;
+++		__entry->act		= XDP_REDIRECT;
+++		__entry->cpu		= smp_processor_id();
+++		__entry->drops		= drops;
+++		__entry->processed	= processed;
+++		__entry->to_cpu		= to_cpu;
+++	),
+++
+++	TP_printk("enqueue"
+++		  " cpu=%d map_id=%d action=%s"
+++		  " processed=%u drops=%u"
+++		  " to_cpu=%d",
+++		  __entry->cpu, __entry->map_id,
+++		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+++		  __entry->processed, __entry->drops,
+++		  __entry->to_cpu)
+++);
+++
+++TRACE_EVENT(xdp_devmap_xmit,
+++
+++	TP_PROTO(const struct bpf_map *map, u32 map_index,
+++		 int sent, int drops,
+++		 const struct net_device *from_dev,
+++		 const struct net_device *to_dev, int err),
+++
+++	TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err),
+++
+++	TP_STRUCT__entry(
+++		__field(int, map_id)
+++		__field(u32, act)
+++		__field(u32, map_index)
+++		__field(int, drops)
+++		__field(int, sent)
+++		__field(int, from_ifindex)
+++		__field(int, to_ifindex)
+++		__field(int, err)
+++	),
+++
+++	TP_fast_assign(
+++		__entry->map_id		= map->id;
+++		__entry->act		= XDP_REDIRECT;
+++		__entry->map_index	= map_index;
+++		__entry->drops		= drops;
+++		__entry->sent		= sent;
+++		__entry->from_ifindex	= from_dev->ifindex;
+++		__entry->to_ifindex	= to_dev->ifindex;
+++		__entry->err		= err;
+++	),
+++
+++	TP_printk("ndo_xdp_xmit"
+++		  " map_id=%d map_index=%d action=%s"
+++		  " sent=%d drops=%d"
+++		  " from_ifindex=%d to_ifindex=%d err=%d",
+++		  __entry->map_id, __entry->map_index,
+++		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+++		  __entry->sent, __entry->drops,
+++		  __entry->from_ifindex, __entry->to_ifindex, __entry->err)
+++);
+++
+++/* Expect users already include <net/xdp.h>, but not xdp_priv.h */
+++#include <net/xdp_priv.h>
+++
+++#define __MEM_TYPE_MAP(FN)	\
+++	FN(PAGE_SHARED)		\
+++	FN(PAGE_ORDER0)		\
+++	FN(PAGE_POOL)		\
+++	FN(ZERO_COPY)
+++
+++#define __MEM_TYPE_TP_FN(x)	\
+++	TRACE_DEFINE_ENUM(MEM_TYPE_##x);
+++#define __MEM_TYPE_SYM_FN(x)	\
+++	{ MEM_TYPE_##x, #x },
+++#define __MEM_TYPE_SYM_TAB	\
+++	__MEM_TYPE_MAP(__MEM_TYPE_SYM_FN) { -1, 0 }
+++__MEM_TYPE_MAP(__MEM_TYPE_TP_FN)
+++
+++TRACE_EVENT(mem_disconnect,
+++
+++	TP_PROTO(const struct xdp_mem_allocator *xa),
+++
+++	TP_ARGS(xa),
+++
+++	TP_STRUCT__entry(
+++		__field(const struct xdp_mem_allocator *,	xa)
+++		__field(u32,		mem_id)
+++		__field(u32,		mem_type)
+++		__field(const void *,	allocator)
+++	),
+++
+++	TP_fast_assign(
+++		__entry->xa		= xa;
+++		__entry->mem_id		= xa->mem.id;
+++		__entry->mem_type	= xa->mem.type;
+++		__entry->allocator	= xa->allocator;
+++	),
+++
+++	TP_printk("mem_id=%d mem_type=%s allocator=%p",
+++		  __entry->mem_id,
+++		  __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB),
+++		  __entry->allocator
+++	)
+++);
+++
+++TRACE_EVENT(mem_connect,
+++
+++	TP_PROTO(const struct xdp_mem_allocator *xa,
+++		 const struct xdp_rxq_info *rxq),
+++
+++	TP_ARGS(xa, rxq),
+++
+++	TP_STRUCT__entry(
+++		__field(const struct xdp_mem_allocator *,	xa)
+++		__field(u32,		mem_id)
+++		__field(u32,		mem_type)
+++		__field(const void *,	allocator)
+++		__field(const struct xdp_rxq_info *,		rxq)
+++		__field(int,		ifindex)
+++	),
+++
+++	TP_fast_assign(
+++		__entry->xa		= xa;
+++		__entry->mem_id		= xa->mem.id;
+++		__entry->mem_type	= xa->mem.type;
+++		__entry->allocator	= xa->allocator;
+++		__entry->rxq		= rxq;
+++		__entry->ifindex	= rxq->dev->ifindex;
+++	),
+++
+++	TP_printk("mem_id=%d mem_type=%s allocator=%p"
+++		  " ifindex=%d",
+++		  __entry->mem_id,
+++		  __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB),
+++		  __entry->allocator,
+++		  __entry->ifindex
+++	)
+++);
+++
+++TRACE_EVENT(mem_return_failed,
+++
+++	TP_PROTO(const struct xdp_mem_info *mem,
+++		 const struct page *page),
+++
+++	TP_ARGS(mem, page),
+++
+++	TP_STRUCT__entry(
+++		__field(const struct page *,	page)
+++		__field(u32,		mem_id)
+++		__field(u32,		mem_type)
+++	),
+++
+++	TP_fast_assign(
+++		__entry->page		= page;
+++		__entry->mem_id		= mem->id;
+++		__entry->mem_type	= mem->type;
+++	),
+++
+++	TP_printk("mem_id=%d mem_type=%s page=%p",
+++		  __entry->mem_id,
+++		  __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB),
+++		  __entry->page
+++	)
+++);
+++
+++#endif /* _TRACE_XDP_H */
+++
+++#include <trace/define_trace.h>
++--- /dev/null
+++++ b/include/net/xdp_priv.h
++@@ -0,0 +1,20 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
+++#ifndef __LINUX_NET_XDP_PRIV_H__
+++#define __LINUX_NET_XDP_PRIV_H__
+++
+++#include <linux/rhashtable.h>
+++#include <net/xdp.h>
+++
+++/* Private to net/core/xdp.c, but used by trace/events/xdp.h */
+++struct xdp_mem_allocator {
+++	struct xdp_mem_info mem;
+++	union {
+++		void *allocator;
+++		struct page_pool *page_pool;
+++		struct zero_copy_allocator *zc_alloc;
+++	};
+++	struct rhash_head node;
+++	struct rcu_head rcu;
+++};
+++
+++#endif /* __LINUX_NET_XDP_PRIV_H__ */
++--- /dev/null
+++++ b/include/net/xdp.h
++@@ -0,0 +1,184 @@
+++/* SPDX-License-Identifier: GPL-2.0-only */
+++/* include/net/xdp.h
+++ *
+++ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+++ */
+++#ifndef __LINUX_NET_XDP_H__
+++#define __LINUX_NET_XDP_H__
+++
+++/**
+++ * DOC: XDP RX-queue information
+++ *
+++ * The XDP RX-queue info (xdp_rxq_info) is associated with the driver
+++ * level RX-ring queues.  It is information that is specific to how
+++ * the driver have configured a given RX-ring queue.
+++ *
+++ * Each xdp_buff frame received in the driver carry a (pointer)
+++ * reference to this xdp_rxq_info structure.  This provides the XDP
+++ * data-path read-access to RX-info for both kernel and bpf-side
+++ * (limited subset).
+++ *
+++ * For now, direct access is only safe while running in NAPI/softirq
+++ * context.  Contents is read-mostly and must not be updated during
+++ * driver NAPI/softirq poll.
+++ *
+++ * The driver usage API is a register and unregister API.
+++ *
+++ * The struct is not directly tied to the XDP prog.  A new XDP prog
+++ * can be attached as long as it doesn't change the underlying
+++ * RX-ring.  If the RX-ring does change significantly, the NIC driver
+++ * naturally need to stop the RX-ring before purging and reallocating
+++ * memory.  In that process the driver MUST call unregistor (which
+++ * also apply for driver shutdown and unload).  The register API is
+++ * also mandatory during RX-ring setup.
+++ */
+++
+++enum xdp_mem_type {
+++	MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */
+++	MEM_TYPE_PAGE_ORDER0,     /* Orig XDP full page model */
+++	MEM_TYPE_PAGE_POOL,
+++	MEM_TYPE_ZERO_COPY,
+++	MEM_TYPE_MAX,
+++};
+++
+++/* XDP flags for ndo_xdp_xmit */
+++#define XDP_XMIT_FLUSH		(1U << 0)	/* doorbell signal consumer */
+++#define XDP_XMIT_FLAGS_MASK	XDP_XMIT_FLUSH
+++
+++struct xdp_mem_info {
+++	u32 type; /* enum xdp_mem_type, but known size type */
+++	u32 id;
+++};
+++
+++struct page_pool;
+++
+++struct zero_copy_allocator {
+++	void (*free)(struct zero_copy_allocator *zca, unsigned long handle);
+++};
+++
+++struct xdp_rxq_info {
+++	struct net_device *dev;
+++	u32 queue_index;
+++	u32 reg_state;
+++	struct xdp_mem_info mem;
+++} ____cacheline_aligned; /* perf critical, avoid false-sharing */
+++
+++struct xdp_buff {
+++	void *data;
+++	void *data_end;
+++	void *data_meta;
+++	void *data_hard_start;
+++	unsigned long handle;
+++	struct xdp_rxq_info *rxq;
+++};
+++
+++struct xdp_frame {
+++	void *data;
+++	u16 len;
+++	u16 headroom;
+++	u16 metasize;
+++	/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
+++	 * while mem info is valid on remote CPU.
+++	 */
+++	struct xdp_mem_info mem;
+++	struct net_device *dev_rx; /* used by cpumap */
+++};
+++
+++/* Clear kernel pointers in xdp_frame */
+++static inline void xdp_scrub_frame(struct xdp_frame *frame)
+++{
+++	frame->data = NULL;
+++	frame->dev_rx = NULL;
+++}
+++
+++struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
+++
+++/* Convert xdp_buff to xdp_frame */
+++static inline
+++struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
+++{
+++	struct xdp_frame *xdp_frame;
+++	int metasize;
+++	int headroom;
+++
+++	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
+++		return xdp_convert_zc_to_xdp_frame(xdp);
+++
+++	/* Assure headroom is available for storing info */
+++	headroom = xdp->data - xdp->data_hard_start;
+++	metasize = xdp->data - xdp->data_meta;
+++	metasize = metasize > 0 ? metasize : 0;
+++	if (unlikely((headroom - metasize) < sizeof(*xdp_frame)))
+++		return NULL;
+++
+++	/* Store info in top of packet */
+++	xdp_frame = xdp->data_hard_start;
+++
+++	xdp_frame->data = xdp->data;
+++	xdp_frame->len  = xdp->data_end - xdp->data;
+++	xdp_frame->headroom = headroom - sizeof(*xdp_frame);
+++	xdp_frame->metasize = metasize;
+++
+++	/* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
+++	xdp_frame->mem = xdp->rxq->mem;
+++
+++	return xdp_frame;
+++}
+++
+++void xdp_return_frame(struct xdp_frame *xdpf);
+++void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
+++void xdp_return_buff(struct xdp_buff *xdp);
+++
+++/* When sending xdp_frame into the network stack, then there is no
+++ * return point callback, which is needed to release e.g. DMA-mapping
+++ * resources with page_pool.  Thus, have explicit function to release
+++ * frame resources.
+++ */
+++void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
+++static inline void xdp_release_frame(struct xdp_frame *xdpf)
+++{
+++	struct xdp_mem_info *mem = &xdpf->mem;
+++
+++	/* Curr only page_pool needs this */
+++	if (mem->type == MEM_TYPE_PAGE_POOL)
+++		__xdp_release_frame(xdpf->data, mem);
+++}
+++
+++int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+++		     struct net_device *dev, u32 queue_index);
+++void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
+++void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
+++bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
+++int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
+++			       enum xdp_mem_type type, void *allocator);
+++void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
+++
+++/* Drivers not supporting XDP metadata can use this helper, which
+++ * rejects any room expansion for metadata as a result.
+++ */
+++static __always_inline void
+++xdp_set_data_meta_invalid(struct xdp_buff *xdp)
+++{
+++	xdp->data_meta = xdp->data + 1;
+++}
+++
+++static __always_inline bool
+++xdp_data_meta_unsupported(const struct xdp_buff *xdp)
+++{
+++	return unlikely(xdp->data_meta > xdp->data);
+++}
+++
+++struct xdp_attachment_info {
+++	struct bpf_prog *prog;
+++	u32 flags;
+++};
+++
+++struct netdev_bpf;
+++int xdp_attachment_query(struct xdp_attachment_info *info,
+++			 struct netdev_bpf *bpf);
+++bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
+++			     struct netdev_bpf *bpf);
+++void xdp_attachment_setup(struct xdp_attachment_info *info,
+++			  struct netdev_bpf *bpf);
+++
+++#endif /* __LINUX_NET_XDP_H__ */
++--- a/include/linux/atomic.h
+++++ b/include/linux/atomic.h
++@@ -437,6 +437,8 @@ static inline int atomic_add_unless(atom
++ 	return __atomic_add_unless(v, a, u) != u;
++ }
++ 
+++#define atomic_fetch_add_unless __atomic_add_unless
+++
++ /**
++  * atomic_inc_not_zero - increment unless the number is zero
++  * @v: pointer of type atomic_t
++--- a/include/linux/kernel.h
+++++ b/include/linux/kernel.h
++@@ -45,6 +45,13 @@
++ 
++ #define STACK_MAGIC	0xdeadbeef
++ 
+++#define u64_to_user_ptr(x) (				\
+++	{						\
+++		typecheck(u64, (x));			\
+++		(void __user *)(uintptr_t)(x);		\
+++	}						\
+++)
+++
++ #define REPEAT_BYTE(x)	((~0ul / 0xff) * (x))
++ 
++ #define ALIGN(x, a)		__ALIGN_KERNEL((x), (a))
++--- /dev/null
+++++ b/include/linux/tnum.h
++@@ -0,0 +1,89 @@
+++/* tnum: tracked (or tristate) numbers
+++ *
+++ * A tnum tracks knowledge about the bits of a value.  Each bit can be either
+++ * known (0 or 1), or unknown (x).  Arithmetic operations on tnums will
+++ * propagate the unknown bits such that the tnum result represents all the
+++ * possible results for possible values of the operands.
+++ */
+++
+++#ifndef _LINUX_TNUM_H
+++#define _LINUX_TNUM_H
+++
+++#include <linux/types.h>
+++
+++struct tnum {
+++	u64 value;
+++	u64 mask;
+++};
+++
+++/* Constructors */
+++/* Represent a known constant as a tnum. */
+++struct tnum tnum_const(u64 value);
+++/* A completely unknown value */
+++extern const struct tnum tnum_unknown;
+++/* A value that's unknown except that @min <= value <= @max */
+++struct tnum tnum_range(u64 min, u64 max);
+++
+++/* Arithmetic and logical ops */
+++/* Shift a tnum left (by a fixed shift) */
+++struct tnum tnum_lshift(struct tnum a, u8 shift);
+++/* Shift (rsh) a tnum right (by a fixed shift) */
+++struct tnum tnum_rshift(struct tnum a, u8 shift);
+++/* Shift (arsh) a tnum right (by a fixed min_shift) */
+++struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness);
+++/* Add two tnums, return @a + @b */
+++struct tnum tnum_add(struct tnum a, struct tnum b);
+++/* Subtract two tnums, return @a - @b */
+++struct tnum tnum_sub(struct tnum a, struct tnum b);
+++/* Bitwise-AND, return @a & @b */
+++struct tnum tnum_and(struct tnum a, struct tnum b);
+++/* Bitwise-OR, return @a | @b */
+++struct tnum tnum_or(struct tnum a, struct tnum b);
+++/* Bitwise-XOR, return @a ^ @b */
+++struct tnum tnum_xor(struct tnum a, struct tnum b);
+++/* Multiply two tnums, return @a * @b */
+++struct tnum tnum_mul(struct tnum a, struct tnum b);
+++
+++/* Return a tnum representing numbers satisfying both @a and @b */
+++struct tnum tnum_intersect(struct tnum a, struct tnum b);
+++
+++/* Return @a with all but the lowest @size bytes cleared */
+++struct tnum tnum_cast(struct tnum a, u8 size);
+++
+++/* Returns true if @a is a known constant */
+++static inline bool tnum_is_const(struct tnum a)
+++{
+++	return !a.mask;
+++}
+++
+++/* Returns true if @a == tnum_const(@b) */
+++static inline bool tnum_equals_const(struct tnum a, u64 b)
+++{
+++	return tnum_is_const(a) && a.value == b;
+++}
+++
+++/* Returns true if @a is completely unknown */
+++static inline bool tnum_is_unknown(struct tnum a)
+++{
+++	return !~a.mask;
+++}
+++
+++/* Returns true if @a is known to be a multiple of @size.
+++ * @size must be a power of two.
+++ */
+++bool tnum_is_aligned(struct tnum a, u64 size);
+++
+++/* Returns true if @b represents a subset of @a. */
+++bool tnum_in(struct tnum a, struct tnum b);
+++
+++/* Formatting functions.  These have snprintf-like semantics: they will write
+++ * up to @size bytes (including the terminating NUL byte), and return the number
+++ * of bytes (excluding the terminating NUL) which would have been written had
+++ * sufficient space been available.  (Thus tnum_sbin always returns 64.)
+++ */
+++/* Format a tnum as a pair of hex numbers (value; mask) */
+++int tnum_strn(char *str, size_t size, struct tnum a);
+++/* Format a tnum as tristate binary expansion */
+++int tnum_sbin(char *str, size_t size, struct tnum a);
+++
+++#endif /* _LINUX_TNUM_H */
++--- a/include/linux/bitmap.h
+++++ b/include/linux/bitmap.h
++@@ -326,6 +326,24 @@ static inline int bitmap_parse(const cha
++ 	return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits);
++ }
++ 
+++/**
+++ * bitmap_from_u64 - Check and swap words within u64.
+++ *  @mask: source bitmap
+++ *  @dst:  destination bitmap
+++ *
+++ * In 32-bit Big Endian kernel, when using ``(u32 *)(&val)[*]``
+++ * to read u64 mask, we will get the wrong word.
+++ * That is ``(u32 *)(&val)[0]`` gets the upper 32 bits,
+++ * but we expect the lower 32-bits of u64.
+++ */
+++static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
+++{
+++	dst[0] = mask & ULONG_MAX;
+++
+++	if (sizeof(mask) > sizeof(unsigned long))
+++		dst[1] = mask >> 32;
+++}
+++
++ #endif /* __ASSEMBLY__ */
++ 
++ #endif /* __LINUX_BITMAP_H */
++--- /dev/null
+++++ b/include/linux/overflow.h
++@@ -0,0 +1,320 @@
+++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+++#ifndef __LINUX_OVERFLOW_H
+++#define __LINUX_OVERFLOW_H
+++
+++#include <linux/compiler.h>
+++#include <linux/kernel.h>
+++
+++/*
+++ * In the fallback code below, we need to compute the minimum and
+++ * maximum values representable in a given type. These macros may also
+++ * be useful elsewhere, so we provide them outside the
+++ * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
+++ *
+++ * It would seem more obvious to do something like
+++ *
+++ * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
+++ * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
+++ *
+++ * Unfortunately, the middle expressions, strictly speaking, have
+++ * undefined behaviour, and at least some versions of gcc warn about
+++ * the type_max expression (but not if -fsanitize=undefined is in
+++ * effect; in that case, the warning is deferred to runtime...).
+++ *
+++ * The slightly excessive casting in type_min is to make sure the
+++ * macros also produce sensible values for the exotic type _Bool. [The
+++ * overflow checkers only almost work for _Bool, but that's
+++ * a-feature-not-a-bug, since people shouldn't be doing arithmetic on
+++ * _Bools. Besides, the gcc builtins don't allow _Bool* as third
+++ * argument.]
+++ *
+++ * Idea stolen from
+++ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html -
+++ * credit to Christian Biere.
+++ */
+++#define is_signed_type(type)       (((type)(-1)) < (type)1)
+++#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
+++#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+++#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+++
+++/*
+++ * Avoids triggering -Wtype-limits compilation warning,
+++ * while using unsigned data types to check a < 0.
+++ */
+++#define is_non_negative(a) ((a) > 0 || (a) == 0)
+++#define is_negative(a) (!(is_non_negative(a)))
+++
+++#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+++/*
+++ * For simplicity and code hygiene, the fallback code below insists on
+++ * a, b and *d having the same type (similar to the min() and max()
+++ * macros), whereas gcc's type-generic overflow checkers accept
+++ * different types. Hence we don't just make check_add_overflow an
+++ * alias for __builtin_add_overflow, but add type checks similar to
+++ * below.
+++ */
+++#define check_add_overflow(a, b, d) ({		\
+++	typeof(a) __a = (a);			\
+++	typeof(b) __b = (b);			\
+++	typeof(d) __d = (d);			\
+++	(void) (&__a == &__b);			\
+++	(void) (&__a == __d);			\
+++	__builtin_add_overflow(__a, __b, __d);	\
+++})
+++
+++#define check_sub_overflow(a, b, d) ({		\
+++	typeof(a) __a = (a);			\
+++	typeof(b) __b = (b);			\
+++	typeof(d) __d = (d);			\
+++	(void) (&__a == &__b);			\
+++	(void) (&__a == __d);			\
+++	__builtin_sub_overflow(__a, __b, __d);	\
+++})
+++
+++#define check_mul_overflow(a, b, d) ({		\
+++	typeof(a) __a = (a);			\
+++	typeof(b) __b = (b);			\
+++	typeof(d) __d = (d);			\
+++	(void) (&__a == &__b);			\
+++	(void) (&__a == __d);			\
+++	__builtin_mul_overflow(__a, __b, __d);	\
+++})
+++
+++#else
+++
+++
+++/* Checking for unsigned overflow is relatively easy without causing UB. */
+++#define __unsigned_add_overflow(a, b, d) ({	\
+++	typeof(a) __a = (a);			\
+++	typeof(b) __b = (b);			\
+++	typeof(d) __d = (d);			\
+++	(void) (&__a == &__b);			\
+++	(void) (&__a == __d);			\
+++	*__d = __a + __b;			\
+++	*__d < __a;				\
+++})
+++#define __unsigned_sub_overflow(a, b, d) ({	\
+++	typeof(a) __a = (a);			\
+++	typeof(b) __b = (b);			\
+++	typeof(d) __d = (d);			\
+++	(void) (&__a == &__b);			\
+++	(void) (&__a == __d);			\
+++	*__d = __a - __b;			\
+++	__a < __b;				\
+++})
+++/*
+++ * If one of a or b is a compile-time constant, this avoids a division.
+++ */
+++#define __unsigned_mul_overflow(a, b, d) ({		\
+++	typeof(a) __a = (a);				\
+++	typeof(b) __b = (b);				\
+++	typeof(d) __d = (d);				\
+++	(void) (&__a == &__b);				\
+++	(void) (&__a == __d);				\
+++	*__d = __a * __b;				\
+++	__builtin_constant_p(__b) ?			\
+++	  __b > 0 && __a > type_max(typeof(__a)) / __b : \
+++	  __a > 0 && __b > type_max(typeof(__b)) / __a;	 \
+++})
+++
+++/*
+++ * For signed types, detecting overflow is much harder, especially if
+++ * we want to avoid UB. But the interface of these macros is such that
+++ * we must provide a result in *d, and in fact we must produce the
+++ * result promised by gcc's builtins, which is simply the possibly
+++ * wrapped-around value. Fortunately, we can just formally do the
+++ * operations in the widest relevant unsigned type (u64) and then
+++ * truncate the result - gcc is smart enough to generate the same code
+++ * with and without the (u64) casts.
+++ */
+++
+++/*
+++ * Adding two signed integers can overflow only if they have the same
+++ * sign, and overflow has happened iff the result has the opposite
+++ * sign.
+++ */
+++#define __signed_add_overflow(a, b, d) ({	\
+++	typeof(a) __a = (a);			\
+++	typeof(b) __b = (b);			\
+++	typeof(d) __d = (d);			\
+++	(void) (&__a == &__b);			\
+++	(void) (&__a == __d);			\
+++	*__d = (u64)__a + (u64)__b;		\
+++	(((~(__a ^ __b)) & (*__d ^ __a))	\
+++		& type_min(typeof(__a))) != 0;	\
+++})
+++
+++/*
+++ * Subtraction is similar, except that overflow can now happen only
+++ * when the signs are opposite. In this case, overflow has happened if
+++ * the result has the opposite sign of a.
+++ */
+++#define __signed_sub_overflow(a, b, d) ({	\
+++	typeof(a) __a = (a);			\
+++	typeof(b) __b = (b);			\
+++	typeof(d) __d = (d);			\
+++	(void) (&__a == &__b);			\
+++	(void) (&__a == __d);			\
+++	*__d = (u64)__a - (u64)__b;		\
+++	((((__a ^ __b)) & (*__d ^ __a))		\
+++		& type_min(typeof(__a))) != 0;	\
+++})
+++
+++/*
+++ * Signed multiplication is rather hard. gcc always follows C99, so
+++ * division is truncated towards 0. This means that we can write the
+++ * overflow check like this:
+++ *
+++ * (a > 0 && (b > MAX/a || b < MIN/a)) ||
+++ * (a < -1 && (b > MIN/a || b < MAX/a) ||
+++ * (a == -1 && b == MIN)
+++ *
+++ * The redundant casts of -1 are to silence an annoying -Wtype-limits
+++ * (included in -Wextra) warning: When the type is u8 or u16, the
+++ * __b_c_e in check_mul_overflow obviously selects
+++ * __unsigned_mul_overflow, but unfortunately gcc still parses this
+++ * code and warns about the limited range of __b.
+++ */
+++
+++#define __signed_mul_overflow(a, b, d) ({				\
+++	typeof(a) __a = (a);						\
+++	typeof(b) __b = (b);						\
+++	typeof(d) __d = (d);						\
+++	typeof(a) __tmax = type_max(typeof(a));				\
+++	typeof(a) __tmin = type_min(typeof(a));				\
+++	(void) (&__a == &__b);						\
+++	(void) (&__a == __d);						\
+++	*__d = (u64)__a * (u64)__b;					\
+++	(__b > 0   && (__a > __tmax/__b || __a < __tmin/__b)) ||	\
+++	(__b < (typeof(__b))-1  && (__a > __tmin/__b || __a < __tmax/__b)) || \
+++	(__b == (typeof(__b))-1 && __a == __tmin);			\
+++})
+++
+++
+++#define check_add_overflow(a, b, d)					\
+++	__builtin_choose_expr(is_signed_type(typeof(a)),		\
+++			__signed_add_overflow(a, b, d),			\
+++			__unsigned_add_overflow(a, b, d))
+++
+++#define check_sub_overflow(a, b, d)					\
+++	__builtin_choose_expr(is_signed_type(typeof(a)),		\
+++			__signed_sub_overflow(a, b, d),			\
+++			__unsigned_sub_overflow(a, b, d))
+++
+++#define check_mul_overflow(a, b, d)					\
+++	__builtin_choose_expr(is_signed_type(typeof(a)),		\
+++			__signed_mul_overflow(a, b, d),			\
+++			__unsigned_mul_overflow(a, b, d))
+++
+++
+++#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
+++
+++/** check_shl_overflow() - Calculate a left-shifted value and check overflow
+++ *
+++ * @a: Value to be shifted
+++ * @s: How many bits left to shift
+++ * @d: Pointer to where to store the result
+++ *
+++ * Computes *@d = (@a << @s)
+++ *
+++ * Returns true if '*d' cannot hold the result or when 'a << s' doesn't
+++ * make sense. Example conditions:
+++ * - 'a << s' causes bits to be lost when stored in *d.
+++ * - 's' is garbage (e.g. negative) or so large that the result of
+++ *   'a << s' is guaranteed to be 0.
+++ * - 'a' is negative.
+++ * - 'a << s' sets the sign bit, if any, in '*d'.
+++ *
+++ * '*d' will hold the results of the attempted shift, but is not
+++ * considered "safe for use" if false is returned.
+++ */
+++#define check_shl_overflow(a, s, d) ({					\
+++	typeof(a) _a = a;						\
+++	typeof(s) _s = s;						\
+++	typeof(d) _d = d;						\
+++	u64 _a_full = _a;						\
+++	unsigned int _to_shift =					\
+++		is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0;	\
+++	*_d = (_a_full << _to_shift);					\
+++	(_to_shift != _s || is_negative(*_d) || is_negative(_a) ||	\
+++	(*_d >> _to_shift) != _a);					\
+++})
+++
+++/**
+++ * array_size() - Calculate size of 2-dimensional array.
+++ *
+++ * @a: dimension one
+++ * @b: dimension two
+++ *
+++ * Calculates size of 2-dimensional array: @a * @b.
+++ *
+++ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+++ * overflow.
+++ */
+++static inline __must_check size_t array_size(size_t a, size_t b)
+++{
+++	size_t bytes;
+++
+++	if (check_mul_overflow(a, b, &bytes))
+++		return SIZE_MAX;
+++
+++	return bytes;
+++}
+++
+++/**
+++ * array3_size() - Calculate size of 3-dimensional array.
+++ *
+++ * @a: dimension one
+++ * @b: dimension two
+++ * @c: dimension three
+++ *
+++ * Calculates size of 3-dimensional array: @a * @b * @c.
+++ *
+++ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+++ * overflow.
+++ */
+++static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
+++{
+++	size_t bytes;
+++
+++	if (check_mul_overflow(a, b, &bytes))
+++		return SIZE_MAX;
+++	if (check_mul_overflow(bytes, c, &bytes))
+++		return SIZE_MAX;
+++
+++	return bytes;
+++}
+++
+++/*
+++ * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for
+++ * struct_size() below.
+++ */
+++static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
+++{
+++	size_t bytes;
+++
+++	if (check_mul_overflow(a, b, &bytes))
+++		return SIZE_MAX;
+++	if (check_add_overflow(bytes, c, &bytes))
+++		return SIZE_MAX;
+++
+++	return bytes;
+++}
+++
+++/**
+++ * struct_size() - Calculate size of structure with trailing array.
+++ * @p: Pointer to the structure.
+++ * @member: Name of the array member.
+++ * @n: Number of elements in the array.
+++ *
+++ * Calculates size of memory needed for structure @p followed by an
+++ * array of @n @member elements.
+++ *
+++ * Return: number of bytes needed or SIZE_MAX on overflow.
+++ */
+++#define struct_size(p, member, n)					\
+++	__ab_c_size(n,							\
+++		    sizeof(*(p)->member) + __must_be_array((p)->member),\
+++		    sizeof(*(p)))
+++
+++#endif /* __LINUX_OVERFLOW_H */
++--- a/net/core/filter.c
+++++ b/net/core/filter.c
++@@ -1,3 +1,4 @@
+++// SPDX-License-Identifier: GPL-2.0-or-later
++ /*
++  * Linux Socket Filter - Kernel level socket filtering
++  *
++@@ -12,11 +13,6 @@
++  *	Alexei Starovoitov <ast@plumgrid.com>
++  *	Daniel Borkmann <dborkman@redhat.com>
++  *
++- * This program is free software; you can redistribute it and/or
++- * modify it under the terms of the GNU General Public License
++- * as published by the Free Software Foundation; either version
++- * 2 of the License, or (at your option) any later version.
++- *
++  * Andi Kleen - Fix a few bad bugs and races.
++  * Kris Katterjohn - Added many additional checks in bpf_check_classic()
++  */
++@@ -26,11 +22,14 @@
++ #include <linux/mm.h>
++ #include <linux/fcntl.h>
++ #include <linux/socket.h>
+++#include <linux/sock_diag.h>
++ #include <linux/in.h>
++ #include <linux/inet.h>
++ #include <linux/netdevice.h>
++ #include <linux/if_packet.h>
+++#include <linux/if_arp.h>
++ #include <linux/gfp.h>
+++#include <net/inet_common.h>
++ #include <net/ip.h>
++ #include <net/protocol.h>
++ #include <net/netlink.h>
++@@ -39,17 +38,32 @@
++ #include <net/flow_dissector.h>
++ #include <linux/errno.h>
++ #include <linux/timer.h>
++-#include <asm/uaccess.h>
+++#include <linux/uaccess.h>
++ #include <asm/unaligned.h>
+++#include <asm/cmpxchg.h>
++ #include <linux/filter.h>
++ #include <linux/ratelimit.h>
++ #include <linux/seccomp.h>
++ #include <linux/if_vlan.h>
++ #include <linux/bpf.h>
++ #include <net/sch_generic.h>
++-#include <net/cls_cgroup.h>
++ #include <net/dst_metadata.h>
++ #include <net/dst.h>
+++#include <net/busy_poll.h>
+++#include <net/tcp.h>
+++#include <net/xfrm.h>
+++#include <net/udp.h>
+++#include <linux/bpf_trace.h>
+++#include <linux/inetdevice.h>
+++#include <net/inet_hashtables.h>
+++#include <net/inet6_hashtables.h>
+++#include <net/ip_fib.h>
+++#include <net/nexthop.h>
+++#include <net/flow.h>
+++#include <net/arp.h>
+++#include <net/ipv6.h>
+++#include <net/net_namespace.h>
+++#include <net/lwtunnel.h>
++ 
++ /**
++  *	sk_filter_trim_cap - run a packet through a socket filter
++@@ -84,7 +98,12 @@ int sk_filter_trim_cap(struct sock *sk,
++ 	rcu_read_lock();
++ 	filter = rcu_dereference(sk->sk_filter);
++ 	if (filter) {
++-		unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
+++		struct sock *save_sk = skb->sk;
+++		unsigned int pkt_len;
+++
+++		skb->sk = sk;
+++		pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
+++		skb->sk = save_sk;
++ 		err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
++ 	}
++ 	rcu_read_unlock();
++@@ -93,14 +112,13 @@ int sk_filter_trim_cap(struct sock *sk,
++ }
++ EXPORT_SYMBOL(sk_filter_trim_cap);
++ 
++-static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+++BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
++ {
++-	return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
+++	return skb_get_poff(skb);
++ }
++ 
++-static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+++BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
++ {
++-	struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
++ 	struct nlattr *nla;
++ 
++ 	if (skb_is_nonlinear(skb))
++@@ -119,9 +137,8 @@ static u64 __skb_get_nlattr(u64 ctx, u64
++ 	return 0;
++ }
++ 
++-static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+++BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
++ {
++-	struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
++ 	struct nlattr *nla;
++ 
++ 	if (skb_is_nonlinear(skb))
++@@ -144,11 +161,98 @@ static u64 __skb_get_nlattr_nest(u64 ctx
++ 	return 0;
++ }
++ 
++-static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+++BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
+++	   data, int, headlen, int, offset)
+++{
+++	u8 tmp, *ptr;
+++	const int len = sizeof(tmp);
+++
+++	if (offset >= 0) {
+++		if (headlen - offset >= len)
+++			return *(u8 *)(data + offset);
+++		if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
+++			return tmp;
+++	} else {
+++		ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
+++		if (likely(ptr))
+++			return *(u8 *)ptr;
+++	}
+++
+++	return -EFAULT;
+++}
+++
+++BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
+++	   int, offset)
+++{
+++	return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len,
+++					 offset);
+++}
+++
+++BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
+++	   data, int, headlen, int, offset)
+++{
+++	u16 tmp, *ptr;
+++	const int len = sizeof(tmp);
+++
+++	if (offset >= 0) {
+++		if (headlen - offset >= len)
+++			return get_unaligned_be16(data + offset);
+++		if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
+++			return be16_to_cpu(tmp);
+++	} else {
+++		ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
+++		if (likely(ptr))
+++			return get_unaligned_be16(ptr);
+++	}
+++
+++	return -EFAULT;
+++}
+++
+++BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
+++	   int, offset)
+++{
+++	return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len,
+++					  offset);
+++}
+++
+++BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
+++	   data, int, headlen, int, offset)
+++{
+++	u32 tmp, *ptr;
+++	const int len = sizeof(tmp);
+++
+++	if (likely(offset >= 0)) {
+++		if (headlen - offset >= len)
+++			return get_unaligned_be32(data + offset);
+++		if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
+++			return be32_to_cpu(tmp);
+++	} else {
+++		ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
+++		if (likely(ptr))
+++			return get_unaligned_be32(ptr);
+++	}
+++
+++	return -EFAULT;
+++}
+++
+++BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
+++	   int, offset)
+++{
+++	return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len,
+++					  offset);
+++}
+++
+++BPF_CALL_0(bpf_get_raw_cpu_id)
++ {
++ 	return raw_smp_processor_id();
++ }
++ 
+++static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
+++	.func		= bpf_get_raw_cpu_id,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++};
+++
++ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
++ 			      struct bpf_insn *insn_buf)
++ {
++@@ -178,22 +282,18 @@ static u32 convert_skb_access(int skb_fi
++ 		break;
++ 
++ 	case SKF_AD_VLAN_TAG:
++-	case SKF_AD_VLAN_TAG_PRESENT:
++ 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
++-		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
++ 
++ 		/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
++ 		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
++ 				      offsetof(struct sk_buff, vlan_tci));
++-		if (skb_field == SKF_AD_VLAN_TAG) {
++-			*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
++-						~VLAN_TAG_PRESENT);
++-		} else {
++-			/* dst_reg >>= 12 */
++-			*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
++-			/* dst_reg &= 1 */
+++		break;
+++	case SKF_AD_VLAN_TAG_PRESENT:
+++		*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET());
+++		if (PKT_VLAN_PRESENT_BIT)
+++			*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT);
+++		if (PKT_VLAN_PRESENT_BIT < 7)
++ 			*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
++-		}
++ 		break;
++ 	}
++ 
++@@ -226,9 +326,8 @@ static bool convert_bpf_extensions(struc
++ 	case SKF_AD_OFF + SKF_AD_HATYPE:
++ 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
++ 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
++-		BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
++ 
++-		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
++ 				      BPF_REG_TMP, BPF_REG_CTX,
++ 				      offsetof(struct sk_buff, dev));
++ 		/* if (tmp != 0) goto pc + 1 */
++@@ -295,16 +394,16 @@ static bool convert_bpf_extensions(struc
++ 		/* Emit call(arg1=CTX, arg2=A, arg3=X) */
++ 		switch (fp->k) {
++ 		case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
++-			*insn = BPF_EMIT_CALL(__skb_get_pay_offset);
+++			*insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
++ 			break;
++ 		case SKF_AD_OFF + SKF_AD_NLATTR:
++-			*insn = BPF_EMIT_CALL(__skb_get_nlattr);
+++			*insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
++ 			break;
++ 		case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
++-			*insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
+++			*insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
++ 			break;
++ 		case SKF_AD_OFF + SKF_AD_CPU:
++-			*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
+++			*insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
++ 			break;
++ 		case SKF_AD_OFF + SKF_AD_RANDOM:
++ 			*insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
++@@ -331,35 +430,101 @@ static bool convert_bpf_extensions(struc
++ 	return true;
++ }
++ 
+++static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
+++{
+++	const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
+++	int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
+++	bool endian = BPF_SIZE(fp->code) == BPF_H ||
+++		      BPF_SIZE(fp->code) == BPF_W;
+++	bool indirect = BPF_MODE(fp->code) == BPF_IND;
+++	const int ip_align = NET_IP_ALIGN;
+++	struct bpf_insn *insn = *insnp;
+++	int offset = fp->k;
+++
+++	if (!indirect &&
+++	    ((unaligned_ok && offset >= 0) ||
+++	     (!unaligned_ok && offset >= 0 &&
+++	      offset + ip_align >= 0 &&
+++	      offset + ip_align % size == 0))) {
+++		bool ldx_off_ok = offset <= S16_MAX;
+++
+++		*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
+++		if (offset)
+++			*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
+++		*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
+++				      size, 2 + endian + (!ldx_off_ok * 2));
+++		if (ldx_off_ok) {
+++			*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
+++					      BPF_REG_D, offset);
+++		} else {
+++			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
+++			*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
+++			*insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
+++					      BPF_REG_TMP, 0);
+++		}
+++		if (endian)
+++			*insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
+++		*insn++ = BPF_JMP_A(8);
+++	}
+++
+++	*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
+++	*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
+++	*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
+++	if (!indirect) {
+++		*insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
+++	} else {
+++		*insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
+++		if (fp->k)
+++			*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
+++	}
+++
+++	switch (BPF_SIZE(fp->code)) {
+++	case BPF_B:
+++		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
+++		break;
+++	case BPF_H:
+++		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
+++		break;
+++	case BPF_W:
+++		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
+++		break;
+++	default:
+++		return false;
+++	}
+++
+++	*insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2);
+++	*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+++	*insn   = BPF_EXIT_INSN();
+++
+++	*insnp = insn;
+++	return true;
+++}
+++
++ /**
++  *	bpf_convert_filter - convert filter program
++  *	@prog: the user passed filter program
++  *	@len: the length of the user passed filter program
++- *	@new_prog: buffer where converted program will be stored
+++ *	@new_prog: allocated 'struct bpf_prog' or NULL
++  *	@new_len: pointer to store length of converted program
+++ *	@seen_ld_abs: bool whether we've seen ld_abs/ind
++  *
++- * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style.
+++ * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
+++ * style extended BPF (eBPF).
++  * Conversion workflow:
++  *
++  * 1) First pass for calculating the new program length:
++- *   bpf_convert_filter(old_prog, old_len, NULL, &new_len)
+++ *   bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs)
++  *
++  * 2) 2nd pass to remap in two passes: 1st pass finds new
++  *    jump offsets, 2nd pass remapping:
++- *   new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
++- *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
++- *
++- * User BPF's register A is mapped to our BPF register 6, user BPF
++- * register X is mapped to BPF register 7; frame pointer is always
++- * register 10; Context 'void *ctx' is stored in register 1, that is,
++- * for socket filters: ctx == 'struct sk_buff *', for seccomp:
++- * ctx == 'struct seccomp_data *'.
+++ *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs)
++  */
++ static int bpf_convert_filter(struct sock_filter *prog, int len,
++-			      struct bpf_insn *new_prog, int *new_len)
+++			      struct bpf_prog *new_prog, int *new_len,
+++			      bool *seen_ld_abs)
++ {
++-	int new_flen = 0, pass = 0, target, i;
++-	struct bpf_insn *new_insn;
+++	int new_flen = 0, pass = 0, target, i, stack_off;
+++	struct bpf_insn *new_insn, *first_insn = NULL;
++ 	struct sock_filter *fp;
++ 	int *addrs = NULL;
++ 	u8 bpf_src;
++@@ -371,6 +536,7 @@ static int bpf_convert_filter(struct soc
++ 		return -EINVAL;
++ 
++ 	if (new_prog) {
+++		first_insn = new_prog->insnsi;
++ 		addrs = kcalloc(len, sizeof(*addrs),
++ 				GFP_KERNEL | __GFP_NOWARN);
++ 		if (!addrs)
++@@ -378,19 +544,47 @@ static int bpf_convert_filter(struct soc
++ 	}
++ 
++ do_pass:
++-	new_insn = new_prog;
+++	new_insn = first_insn;
++ 	fp = prog;
++ 
++-	if (new_insn)
++-		*new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
++-	new_insn++;
+++	/* Classic BPF related prologue emission. */
+++	if (new_prog) {
+++		/* Classic BPF expects A and X to be reset first. These need
+++		 * to be guaranteed to be the first two instructions.
+++		 */
+++		*new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+++		*new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
+++
+++		/* All programs must keep CTX in callee saved BPF_REG_CTX.
+++		 * In eBPF case it's done by the compiler, here we need to
+++		 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
+++		 */
+++		*new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
+++		if (*seen_ld_abs) {
+++			/* For packet access in classic BPF, cache skb->data
+++			 * in callee-saved BPF R8 and skb->len - skb->data_len
+++			 * (headlen) in BPF R9. Since classic BPF is read-only
+++			 * on CTX, we only need to cache it once.
+++			 */
+++			*new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
+++						  BPF_REG_D, BPF_REG_CTX,
+++						  offsetof(struct sk_buff, data));
+++			*new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
+++						  offsetof(struct sk_buff, len));
+++			*new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
+++						  offsetof(struct sk_buff, data_len));
+++			*new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
+++		}
+++	} else {
+++		new_insn += 3;
+++	}
++ 
++ 	for (i = 0; i < len; fp++, i++) {
++-		struct bpf_insn tmp_insns[6] = { };
+++		struct bpf_insn tmp_insns[32] = { };
++ 		struct bpf_insn *insn = tmp_insns;
++ 
++ 		if (addrs)
++-			addrs[i] = new_insn - new_prog;
+++			addrs[i] = new_insn - first_insn;
++ 
++ 		switch (fp->code) {
++ 		/* All arithmetic insns and skb loads map as-is. */
++@@ -429,6 +623,22 @@ do_pass:
++ 			    BPF_MODE(fp->code) == BPF_ABS &&
++ 			    convert_bpf_extensions(fp, &insn))
++ 				break;
+++			if (BPF_CLASS(fp->code) == BPF_LD &&
+++			    convert_bpf_ld_abs(fp, &insn)) {
+++				*seen_ld_abs = true;
+++				break;
+++			}
+++
+++			if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
+++			    fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
+++				*insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
+++				/* Error with exception code on div/mod by 0.
+++				 * For cBPF programs, this was always return 0.
+++				 */
+++				*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
+++				*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+++				*insn++ = BPF_EXIT_INSN();
+++			}
++ 
++ 			*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
++ 			break;
++@@ -441,11 +651,18 @@ do_pass:
++ 
++ #define BPF_EMIT_JMP							\
++ 	do {								\
+++		const s32 off_min = S16_MIN, off_max = S16_MAX;		\
+++		s32 off;						\
+++									\
++ 		if (target >= len || target < 0)			\
++ 			goto err;					\
++-		insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0;	\
+++		off = addrs ? addrs[target] - addrs[i] - 1 : 0;		\
++ 		/* Adjust pc relative offset for 2nd or 3rd insn. */	\
++-		insn->off -= insn - tmp_insns;				\
+++		off -= insn - tmp_insns;				\
+++		/* Reject anything not fitting into insn->off. */	\
+++		if (off < off_min || off > off_max)			\
+++			goto err;					\
+++		insn->off = off;					\
++ 	} while (0)
++ 
++ 		case BPF_JMP | BPF_JA:
++@@ -487,14 +704,27 @@ do_pass:
++ 				break;
++ 			}
++ 
++-			/* Convert JEQ into JNE when 'jump_true' is next insn. */
++-			if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
++-				insn->code = BPF_JMP | BPF_JNE | bpf_src;
+++			/* Convert some jumps when 'jump_true' is next insn. */
+++			if (fp->jt == 0) {
+++				switch (BPF_OP(fp->code)) {
+++				case BPF_JEQ:
+++					insn->code = BPF_JMP | BPF_JNE | bpf_src;
+++					break;
+++				case BPF_JGT:
+++					insn->code = BPF_JMP | BPF_JLE | bpf_src;
+++					break;
+++				case BPF_JGE:
+++					insn->code = BPF_JMP | BPF_JLT | bpf_src;
+++					break;
+++				default:
+++					goto jmp_rest;
+++				}
+++
++ 				target = i + fp->jf + 1;
++ 				BPF_EMIT_JMP;
++ 				break;
++ 			}
++-
+++jmp_rest:
++ 			/* Other jumps are mapped into two insns: Jxx and JA. */
++ 			target = i + fp->jt + 1;
++ 			insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
++@@ -507,44 +737,64 @@ do_pass:
++ 			break;
++ 
++ 		/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
++-		case BPF_LDX | BPF_MSH | BPF_B:
++-			/* tmp = A */
++-			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
+++		case BPF_LDX | BPF_MSH | BPF_B: {
+++			struct sock_filter tmp = {
+++				.code	= BPF_LD | BPF_ABS | BPF_B,
+++				.k	= fp->k,
+++			};
+++
+++			*seen_ld_abs = true;
+++
+++			/* X = A */
+++			*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
++ 			/* A = BPF_R0 = *(u8 *) (skb->data + K) */
++-			*insn++ = BPF_LD_ABS(BPF_B, fp->k);
+++			convert_bpf_ld_abs(&tmp, &insn);
+++			insn++;
++ 			/* A &= 0xf */
++ 			*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
++ 			/* A <<= 2 */
++ 			*insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
+++			/* tmp = X */
+++			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
++ 			/* X = A */
++ 			*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
++ 			/* A = tmp */
++ 			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
++ 			break;
++-
++-		/* RET_K, RET_A are remaped into 2 insns. */
+++		}
+++		/* RET_K is remaped into 2 insns. RET_A case doesn't need an
+++		 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
+++		 */
++ 		case BPF_RET | BPF_A:
++ 		case BPF_RET | BPF_K:
++-			*insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
++-						BPF_K : BPF_X, BPF_REG_0,
++-						BPF_REG_A, fp->k);
+++			if (BPF_RVAL(fp->code) == BPF_K)
+++				*insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
+++							0, fp->k);
++ 			*insn = BPF_EXIT_INSN();
++ 			break;
++ 
++ 		/* Store to stack. */
++ 		case BPF_ST:
++ 		case BPF_STX:
+++			stack_off = fp->k * 4  + 4;
++ 			*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
++ 					    BPF_ST ? BPF_REG_A : BPF_REG_X,
++-					    -(BPF_MEMWORDS - fp->k) * 4);
+++					    -stack_off);
+++			/* check_load_and_stores() verifies that classic BPF can
+++			 * load from stack only after write, so tracking
+++			 * stack_depth for ST|STX insns is enough
+++			 */
+++			if (new_prog && new_prog->aux->stack_depth < stack_off)
+++				new_prog->aux->stack_depth = stack_off;
++ 			break;
++ 
++ 		/* Load from stack. */
++ 		case BPF_LD | BPF_MEM:
++ 		case BPF_LDX | BPF_MEM:
+++			stack_off = fp->k * 4  + 4;
++ 			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD  ?
++ 					    BPF_REG_A : BPF_REG_X, BPF_REG_FP,
++-					    -(BPF_MEMWORDS - fp->k) * 4);
+++					    -stack_off);
++ 			break;
++ 
++ 		/* A = K or X = K */
++@@ -592,13 +842,15 @@ do_pass:
++ 
++ 	if (!new_prog) {
++ 		/* Only calculating new length. */
++-		*new_len = new_insn - new_prog;
+++		*new_len = new_insn - first_insn;
+++		if (*seen_ld_abs)
+++			*new_len += 4; /* Prologue bits. */
++ 		return 0;
++ 	}
++ 
++ 	pass++;
++-	if (new_flen != new_insn - new_prog) {
++-		new_flen = new_insn - new_prog;
+++	if (new_flen != new_insn - first_insn) {
+++		new_flen = new_insn - first_insn;
++ 		if (pass > 2)
++ 			goto err;
++ 		goto do_pass;
++@@ -738,6 +990,17 @@ static bool chk_code_allowed(u16 code_to
++ 	return codes[code_to_probe];
++ }
++ 
+++static bool bpf_check_basics_ok(const struct sock_filter *filter,
+++				unsigned int flen)
+++{
+++	if (filter == NULL)
+++		return false;
+++	if (flen == 0 || flen > BPF_MAXINSNS)
+++		return false;
+++
+++	return true;
+++}
+++
++ /**
++  *	bpf_check_classic - verify socket filter code
++  *	@filter: filter to verify
++@@ -758,9 +1021,6 @@ static int bpf_check_classic(const struc
++ 	bool anc_found;
++ 	int pc;
++ 
++-	if (flen == 0 || flen > BPF_MAXINSNS)
++-		return -EINVAL;
++-
++ 	/* Check the filter code now */
++ 	for (pc = 0; pc < flen; pc++) {
++ 		const struct sock_filter *ftest = &filter[pc];
++@@ -901,7 +1161,7 @@ static void sk_filter_release_rcu(struct
++  */
++ static void sk_filter_release(struct sk_filter *fp)
++ {
++-	if (atomic_dec_and_test(&fp->refcnt))
+++	if (refcount_dec_and_test(&fp->refcnt))
++ 		call_rcu(&fp->rcu, sk_filter_release_rcu);
++ }
++ 
++@@ -916,25 +1176,37 @@ void sk_filter_uncharge(struct sock *sk,
++ /* try to charge the socket memory if there is space available
++  * return true on success
++  */
++-bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+++static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
++ {
++ 	u32 filter_size = bpf_prog_size(fp->prog->len);
++ 
++ 	/* same check as in sock_kmalloc() */
++ 	if (filter_size <= sysctl_optmem_max &&
++ 	    atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
++-		atomic_inc(&fp->refcnt);
++ 		atomic_add(filter_size, &sk->sk_omem_alloc);
++ 		return true;
++ 	}
++ 	return false;
++ }
++ 
+++bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+++{
+++	if (!refcount_inc_not_zero(&fp->refcnt))
+++		return false;
+++
+++	if (!__sk_filter_charge(sk, fp)) {
+++		sk_filter_release(fp);
+++		return false;
+++	}
+++	return true;
+++}
+++
++ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
++ {
++ 	struct sock_filter *old_prog;
++ 	struct bpf_prog *old_fp;
++ 	int err, new_len, old_len = fp->len;
+++	bool seen_ld_abs = false;
++ 
++ 	/* We are free to overwrite insns et al right here as it
++ 	 * won't be used at this point in time anymore internally
++@@ -956,7 +1228,8 @@ static struct bpf_prog *bpf_migrate_filt
++ 	}
++ 
++ 	/* 1st pass: calculate the new program length. */
++-	err = bpf_convert_filter(old_prog, old_len, NULL, &new_len);
+++	err = bpf_convert_filter(old_prog, old_len, NULL, &new_len,
+++				 &seen_ld_abs);
++ 	if (err)
++ 		goto out_err_free;
++ 
++@@ -975,7 +1248,8 @@ static struct bpf_prog *bpf_migrate_filt
++ 	fp->len = new_len;
++ 
++ 	/* 2nd pass: remap sock_filter insns into bpf_insn insns. */
++-	err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
+++	err = bpf_convert_filter(old_prog, old_len, fp, &new_len,
+++				 &seen_ld_abs);
++ 	if (err)
++ 		/* 2nd bpf_convert_filter() can fail only if it fails
++ 		 * to allocate memory, remapping must succeed. Note,
++@@ -984,7 +1258,9 @@ static struct bpf_prog *bpf_migrate_filt
++ 		 */
++ 		goto out_err_free;
++ 
++-	bpf_prog_select_runtime(fp);
+++	fp = bpf_prog_select_runtime(fp, &err);
+++	if (err)
+++		goto out_err_free;
++ 
++ 	kfree(old_prog);
++ 	return fp;
++@@ -1051,7 +1327,7 @@ int bpf_prog_create(struct bpf_prog **pf
++ 	struct bpf_prog *fp;
++ 
++ 	/* Make sure new filter is there and in the right amounts. */
++-	if (fprog->filter == NULL)
+++	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
++ 		return -EINVAL;
++ 
++ 	fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
++@@ -1098,7 +1374,7 @@ int bpf_prog_create_from_user(struct bpf
++ 	int err;
++ 
++ 	/* Make sure new filter is there and in the right amounts. */
++-	if (fprog->filter == NULL)
+++	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
++ 		return -EINVAL;
++ 
++ 	fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
++@@ -1139,8 +1415,7 @@ void bpf_prog_destroy(struct bpf_prog *f
++ }
++ EXPORT_SYMBOL_GPL(bpf_prog_destroy);
++ 
++-static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk,
++-			    bool locked)
+++static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
++ {
++ 	struct sk_filter *fp, *old_fp;
++ 
++@@ -1149,53 +1424,44 @@ static int __sk_attach_prog(struct bpf_p
++ 		return -ENOMEM;
++ 
++ 	fp->prog = prog;
++-	atomic_set(&fp->refcnt, 0);
++ 
++-	if (!sk_filter_charge(sk, fp)) {
+++	if (!__sk_filter_charge(sk, fp)) {
++ 		kfree(fp);
++ 		return -ENOMEM;
++ 	}
+++	refcount_set(&fp->refcnt, 1);
++ 
++-	old_fp = rcu_dereference_protected(sk->sk_filter, locked);
+++	old_fp = rcu_dereference_protected(sk->sk_filter,
+++					   lockdep_sock_is_held(sk));
++ 	rcu_assign_pointer(sk->sk_filter, fp);
+++
++ 	if (old_fp)
++ 		sk_filter_uncharge(sk, old_fp);
++ 
++ 	return 0;
++ }
++ 
++-/**
++- *	sk_attach_filter - attach a socket filter
++- *	@fprog: the filter program
++- *	@sk: the socket to use
++- *
++- * Attach the user's filter code. We first run some sanity checks on
++- * it to make sure it does not explode on us later. If an error
++- * occurs or there is insufficient memory for the filter a negative
++- * errno code is returned. On success the return is zero.
++- */
++-int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk,
++-		       bool locked)
+++static
+++struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
++ {
++ 	unsigned int fsize = bpf_classic_proglen(fprog);
++-	unsigned int bpf_fsize = bpf_prog_size(fprog->len);
++ 	struct bpf_prog *prog;
++ 	int err;
++ 
++ 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
++-		return -EPERM;
+++		return ERR_PTR(-EPERM);
++ 
++ 	/* Make sure new filter is there and in the right amounts. */
++-	if (fprog->filter == NULL)
++-		return -EINVAL;
+++	if (!bpf_check_basics_ok(fprog->filter, fprog->len))
+++		return ERR_PTR(-EINVAL);
++ 
++-	prog = bpf_prog_alloc(bpf_fsize, 0);
+++	prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
++ 	if (!prog)
++-		return -ENOMEM;
+++		return ERR_PTR(-ENOMEM);
++ 
++ 	if (copy_from_user(prog->insns, fprog->filter, fsize)) {
++ 		__bpf_prog_free(prog);
++-		return -EFAULT;
+++		return ERR_PTR(-EINVAL);
++ 	}
++ 
++ 	prog->len = fprog->len;
++@@ -1203,17 +1469,34 @@ int __sk_attach_filter(struct sock_fprog
++ 	err = bpf_prog_store_orig_filter(prog, fprog);
++ 	if (err) {
++ 		__bpf_prog_free(prog);
++-		return -ENOMEM;
+++		return ERR_PTR(-ENOMEM);
++ 	}
++ 
++ 	/* bpf_prepare_filter() already takes care of freeing
++ 	 * memory in case something goes wrong.
++ 	 */
++-	prog = bpf_prepare_filter(prog, NULL);
+++	return bpf_prepare_filter(prog, NULL);
+++}
+++
+++/**
+++ *	sk_attach_filter - attach a socket filter
+++ *	@fprog: the filter program
+++ *	@sk: the socket to use
+++ *
+++ * Attach the user's filter code. We first run some sanity checks on
+++ * it to make sure it does not explode on us later. If an error
+++ * occurs or there is insufficient memory for the filter a negative
+++ * errno code is returned. On success the return is zero.
+++ */
+++int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+++{
+++	struct bpf_prog *prog = __get_filter(fprog, sk);
+++	int err;
+++
++ 	if (IS_ERR(prog))
++ 		return PTR_ERR(prog);
++ 
++-	err = __sk_attach_prog(prog, sk, locked);
+++	err = __sk_attach_prog(prog, sk);
++ 	if (err < 0) {
++ 		__bpf_prog_release(prog);
++ 		return err;
++@@ -1221,31 +1504,25 @@ int __sk_attach_filter(struct sock_fprog
++ 
++ 	return 0;
++ }
++-EXPORT_SYMBOL_GPL(__sk_attach_filter);
+++EXPORT_SYMBOL_GPL(sk_attach_filter);
++ 
++-int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+++static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
++ {
++-	return __sk_attach_filter(fprog, sk, sock_owned_by_user(sk));
+++	if (sock_flag(sk, SOCK_FILTER_LOCKED))
+++		return ERR_PTR(-EPERM);
+++
+++	return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
++ }
++ 
++ int sk_attach_bpf(u32 ufd, struct sock *sk)
++ {
++-	struct bpf_prog *prog;
+++	struct bpf_prog *prog = __get_bpf(ufd, sk);
++ 	int err;
++ 
++-	if (sock_flag(sk, SOCK_FILTER_LOCKED))
++-		return -EPERM;
++-
++-	prog = bpf_prog_get(ufd);
++ 	if (IS_ERR(prog))
++ 		return PTR_ERR(prog);
++ 
++-	if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
++-		bpf_prog_put(prog);
++-		return -EINVAL;
++-	}
++-
++-	err = __sk_attach_prog(prog, sk, sock_owned_by_user(sk));
+++	err = __sk_attach_prog(prog, sk);
++ 	if (err < 0) {
++ 		bpf_prog_put(prog);
++ 		return err;
++@@ -1254,79 +1531,201 @@ int sk_attach_bpf(u32 ufd, struct sock *
++ 	return 0;
++ }
++ 
++-#define BPF_RECOMPUTE_CSUM(flags)	((flags) & 1)
+++struct bpf_scratchpad {
+++	union {
+++		__be32 diff[MAX_BPF_STACK / sizeof(__be32)];
+++		u8     buff[MAX_BPF_STACK];
+++	};
+++};
+++
+++static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
+++
+++static inline int __bpf_try_make_writable(struct sk_buff *skb,
+++					  unsigned int write_len)
+++{
+++	return skb_ensure_writable(skb, write_len);
+++}
+++
+++static inline int bpf_try_make_writable(struct sk_buff *skb,
+++					unsigned int write_len)
+++{
+++	int err = __bpf_try_make_writable(skb, write_len);
+++
+++	bpf_compute_data_pointers(skb);
+++	return err;
+++}
+++
+++static int bpf_try_make_head_writable(struct sk_buff *skb)
+++{
+++	return bpf_try_make_writable(skb, skb_headlen(skb));
+++}
++ 
++-static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
+++static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
+++{
+++	if (skb_at_tc_ingress(skb))
+++		skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+++}
+++
+++static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
+++{
+++	if (skb_at_tc_ingress(skb))
+++		skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+++}
+++
+++BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
+++	   const void *, from, u32, len, u64, flags)
++ {
++-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
++-	int offset = (int) r2;
++-	void *from = (void *) (long) r3;
++-	unsigned int len = (unsigned int) r4;
++-	char buf[16];
++ 	void *ptr;
++ 
++-	/* bpf verifier guarantees that:
++-	 * 'from' pointer points to bpf program stack
++-	 * 'len' bytes of it were initialized
++-	 * 'len' > 0
++-	 * 'skb' is a valid pointer to 'struct sk_buff'
++-	 *
++-	 * so check for invalid 'offset' and too large 'len'
++-	 */
++-	if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
+++	if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
+++		return -EINVAL;
+++	if (unlikely(offset > 0xffff))
++ 		return -EFAULT;
++-	if (unlikely(skb_try_make_writable(skb, offset + len)))
+++	if (unlikely(bpf_try_make_writable(skb, offset + len)))
++ 		return -EFAULT;
++ 
++-	ptr = skb_header_pointer(skb, offset, len, buf);
++-	if (unlikely(!ptr))
++-		return -EFAULT;
++-
++-	if (BPF_RECOMPUTE_CSUM(flags))
++-		skb_postpull_rcsum(skb, ptr, len);
+++	ptr = skb->data + offset;
+++	if (flags & BPF_F_RECOMPUTE_CSUM)
+++		__skb_postpull_rcsum(skb, ptr, len, offset);
++ 
++ 	memcpy(ptr, from, len);
++ 
++-	if (ptr == buf)
++-		/* skb_store_bits cannot return -EFAULT here */
++-		skb_store_bits(skb, offset, ptr, len);
+++	if (flags & BPF_F_RECOMPUTE_CSUM)
+++		__skb_postpush_rcsum(skb, ptr, len, offset);
+++	if (flags & BPF_F_INVALIDATE_HASH)
+++		skb_clear_hash(skb);
++ 
++-	if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE)
++-		skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0));
++ 	return 0;
++ }
++ 
++-const struct bpf_func_proto bpf_skb_store_bytes_proto = {
+++static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
++ 	.func		= bpf_skb_store_bytes,
++ 	.gpl_only	= false,
++ 	.ret_type	= RET_INTEGER,
++ 	.arg1_type	= ARG_PTR_TO_CTX,
++ 	.arg2_type	= ARG_ANYTHING,
++-	.arg3_type	= ARG_PTR_TO_STACK,
++-	.arg4_type	= ARG_CONST_STACK_SIZE,
+++	.arg3_type	= ARG_PTR_TO_MEM,
+++	.arg4_type	= ARG_CONST_SIZE,
++ 	.arg5_type	= ARG_ANYTHING,
++ };
++ 
++-#define BPF_HEADER_FIELD_SIZE(flags)	((flags) & 0x0f)
++-#define BPF_IS_PSEUDO_HEADER(flags)	((flags) & 0x10)
+++BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
+++	   void *, to, u32, len)
+++{
+++	void *ptr;
+++
+++	if (unlikely(offset > 0xffff))
+++		goto err_clear;
+++
+++	ptr = skb_header_pointer(skb, offset, len, to);
+++	if (unlikely(!ptr))
+++		goto err_clear;
+++	if (ptr != to)
+++		memcpy(to, ptr, len);
++ 
++-static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+++	return 0;
+++err_clear:
+++	memset(to, 0, len);
+++	return -EFAULT;
+++}
+++
+++static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
+++	.func		= bpf_skb_load_bytes,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_ANYTHING,
+++	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
+++	.arg4_type	= ARG_CONST_SIZE,
+++};
+++
+++BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
+++	   u32, offset, void *, to, u32, len, u32, start_header)
++ {
++-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
++-	int offset = (int) r2;
++-	__sum16 sum, *ptr;
+++	u8 *end = skb_tail_pointer(skb);
+++	u8 *start, *ptr;
++ 
++-	if (unlikely((u32) offset > 0xffff))
++-		return -EFAULT;
+++	if (unlikely(offset > 0xffff))
+++		goto err_clear;
++ 
++-	if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
++-		return -EFAULT;
+++	switch (start_header) {
+++	case BPF_HDR_START_MAC:
+++		if (unlikely(!skb_mac_header_was_set(skb)))
+++			goto err_clear;
+++		start = skb_mac_header(skb);
+++		break;
+++	case BPF_HDR_START_NET:
+++		start = skb_network_header(skb);
+++		break;
+++	default:
+++		goto err_clear;
+++	}
++ 
++-	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
++-	if (unlikely(!ptr))
+++	ptr = start + offset;
+++
+++	if (likely(ptr + len <= end)) {
+++		memcpy(to, ptr, len);
+++		return 0;
+++	}
+++
+++err_clear:
+++	memset(to, 0, len);
+++	return -EFAULT;
+++}
+++
+++static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
+++	.func		= bpf_skb_load_bytes_relative,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_ANYTHING,
+++	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
+++	.arg4_type	= ARG_CONST_SIZE,
+++	.arg5_type	= ARG_ANYTHING,
+++};
+++
+++BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
+++{
+++	/* Idea is the following: should the needed direct read/write
+++	 * test fail during runtime, we can pull in more data and redo
+++	 * again, since implicitly, we invalidate previous checks here.
+++	 *
+++	 * Or, since we know how much we need to make read/writeable,
+++	 * this can be done once at the program beginning for direct
+++	 * access case. By this we overcome limitations of only current
+++	 * headroom being accessible.
+++	 */
+++	return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
+++}
+++
+++static const struct bpf_func_proto bpf_skb_pull_data_proto = {
+++	.func		= bpf_skb_pull_data,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_ANYTHING,
+++};
+++
+++BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
+++	   u64, from, u64, to, u64, flags)
+++{
+++	__sum16 *ptr;
+++
+++	if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
+++		return -EINVAL;
+++	if (unlikely(offset > 0xffff || offset & 1))
+++		return -EFAULT;
+++	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
++ 		return -EFAULT;
++ 
++-	switch (BPF_HEADER_FIELD_SIZE(flags)) {
+++	ptr = (__sum16 *)(skb->data + offset);
+++	switch (flags & BPF_F_HDR_FIELD_MASK) {
+++	case 0:
+++		if (unlikely(from != 0))
+++			return -EINVAL;
+++
+++		csum_replace_by_diff(ptr, to);
+++		break;
++ 	case 2:
++ 		csum_replace2(ptr, from, to);
++ 		break;
++@@ -1337,14 +1736,10 @@ static u64 bpf_l3_csum_replace(u64 r1, u
++ 		return -EINVAL;
++ 	}
++ 
++-	if (ptr == &sum)
++-		/* skb_store_bits guaranteed to not return -EFAULT here */
++-		skb_store_bits(skb, offset, ptr, sizeof(sum));
++-
++ 	return 0;
++ }
++ 
++-const struct bpf_func_proto bpf_l3_csum_replace_proto = {
+++static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
++ 	.func		= bpf_l3_csum_replace,
++ 	.gpl_only	= false,
++ 	.ret_type	= RET_INTEGER,
++@@ -1355,23 +1750,33 @@ const struct bpf_func_proto bpf_l3_csum_
++ 	.arg5_type	= ARG_ANYTHING,
++ };
++ 
++-static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+++BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
+++	   u64, from, u64, to, u64, flags)
++ {
++-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
++-	bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags);
++-	int offset = (int) r2;
++-	__sum16 sum, *ptr;
+++	bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
+++	bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
+++	bool do_mforce = flags & BPF_F_MARK_ENFORCE;
+++	__sum16 *ptr;
++ 
++-	if (unlikely((u32) offset > 0xffff))
+++	if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
+++			       BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
+++		return -EINVAL;
+++	if (unlikely(offset > 0xffff || offset & 1))
++ 		return -EFAULT;
++-	if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
+++	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
++ 		return -EFAULT;
++ 
++-	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
++-	if (unlikely(!ptr))
++-		return -EFAULT;
+++	ptr = (__sum16 *)(skb->data + offset);
+++	if (is_mmzero && !do_mforce && !*ptr)
+++		return 0;
++ 
++-	switch (BPF_HEADER_FIELD_SIZE(flags)) {
+++	switch (flags & BPF_F_HDR_FIELD_MASK) {
+++	case 0:
+++		if (unlikely(from != 0))
+++			return -EINVAL;
+++
+++		inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
+++		break;
++ 	case 2:
++ 		inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
++ 		break;
++@@ -1382,14 +1787,12 @@ static u64 bpf_l4_csum_replace(u64 r1, u
++ 		return -EINVAL;
++ 	}
++ 
++-	if (ptr == &sum)
++-		/* skb_store_bits guaranteed to not return -EFAULT here */
++-		skb_store_bits(skb, offset, ptr, sizeof(sum));
++-
+++	if (is_mmzero && !*ptr)
+++		*ptr = CSUM_MANGLED_0;
++ 	return 0;
++ }
++ 
++-const struct bpf_func_proto bpf_l4_csum_replace_proto = {
+++static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
++ 	.func		= bpf_l4_csum_replace,
++ 	.gpl_only	= false,
++ 	.ret_type	= RET_INTEGER,
++@@ -1400,30 +1803,172 @@ const struct bpf_func_proto bpf_l4_csum_
++ 	.arg5_type	= ARG_ANYTHING,
++ };
++ 
++-#define BPF_IS_REDIRECT_INGRESS(flags)	((flags) & 1)
+++BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
+++	   __be32 *, to, u32, to_size, __wsum, seed)
+++{
+++	struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
+++	u32 diff_size = from_size + to_size;
+++	int i, j = 0;
+++
+++	/* This is quite flexible, some examples:
+++	 *
+++	 * from_size == 0, to_size > 0,  seed := csum --> pushing data
+++	 * from_size > 0,  to_size == 0, seed := csum --> pulling data
+++	 * from_size > 0,  to_size > 0,  seed := 0    --> diffing data
+++	 *
+++	 * Even for diffing, from_size and to_size don't need to be equal.
+++	 */
+++	if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
+++		     diff_size > sizeof(sp->diff)))
+++		return -EINVAL;
+++
+++	for (i = 0; i < from_size / sizeof(__be32); i++, j++)
+++		sp->diff[j] = ~from[i];
+++	for (i = 0; i <   to_size / sizeof(__be32); i++, j++)
+++		sp->diff[j] = to[i];
+++
+++	return csum_partial(sp->diff, diff_size, seed);
+++}
+++
+++static const struct bpf_func_proto bpf_csum_diff_proto = {
+++	.func		= bpf_csum_diff,
+++	.gpl_only	= false,
+++	.pkt_access	= true,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
+++	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+++	.arg3_type	= ARG_PTR_TO_MEM_OR_NULL,
+++	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
+++	.arg5_type	= ARG_ANYTHING,
+++};
+++
+++BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
+++{
+++	/* The interface is to be used in combination with bpf_csum_diff()
+++	 * for direct packet writes. csum rotation for alignment as well
+++	 * as emulating csum_sub() can be done from the eBPF program.
+++	 */
+++	if (skb->ip_summed == CHECKSUM_COMPLETE)
+++		return (skb->csum = csum_add(skb->csum, csum));
+++
+++	return -ENOTSUPP;
+++}
+++
+++static const struct bpf_func_proto bpf_csum_update_proto = {
+++	.func		= bpf_csum_update,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_ANYTHING,
+++};
+++
+++static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
+++{
+++	return dev_forward_skb(dev, skb);
+++}
+++
+++static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
+++				      struct sk_buff *skb)
+++{
+++	int ret = ____dev_forward_skb(dev, skb);
+++
+++	if (likely(!ret)) {
+++		skb->dev = dev;
+++		ret = netif_rx(skb);
+++	}
+++
+++	return ret;
+++}
+++
+++static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
+++{
+++	int ret;
+++
+++	skb->dev = dev;
+++	skb->tstamp.tv64 = 0;
+++
+++	ret = dev_queue_xmit(skb);
+++
+++	return ret;
+++}
+++
+++static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
+++				 u32 flags)
+++{
+++	unsigned int mlen = skb_network_offset(skb);
+++
+++	if (mlen) {
+++		__skb_pull(skb, mlen);
+++
+++		/* At ingress, the mac header has already been pulled once.
+++		 * At egress, skb_pospull_rcsum has to be done in case that
+++		 * the skb is originated from ingress (i.e. a forwarded skb)
+++		 * to ensure that rcsum starts at net header.
+++		 */
+++		if (!skb_at_tc_ingress(skb))
+++			skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+++	}
+++	skb_pop_mac_header(skb);
+++	skb_reset_mac_len(skb);
+++	return flags & BPF_F_INGRESS ?
+++	       __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
+++}
++ 
++-static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
+++static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
+++				 u32 flags)
+++{
+++	/* Verify that a link layer header is carried */
+++	if (unlikely(skb->mac_header >= skb->network_header)) {
+++		kfree_skb(skb);
+++		return -ERANGE;
+++	}
+++
+++	bpf_push_mac_rcsum(skb);
+++	return flags & BPF_F_INGRESS ?
+++	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+++}
+++
+++static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
+++			  u32 flags)
+++{
+++	if (dev_is_mac_header_xmit(dev))
+++		return __bpf_redirect_common(skb, dev, flags);
+++	else
+++		return __bpf_redirect_no_mac(skb, dev, flags);
+++}
+++
+++BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
++ {
++-	struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
++ 	struct net_device *dev;
+++	struct sk_buff *clone;
+++	int ret;
+++
+++	if (unlikely(flags & ~(BPF_F_INGRESS)))
+++		return -EINVAL;
++ 
++ 	dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
++ 	if (unlikely(!dev))
++ 		return -EINVAL;
++ 
++-	skb2 = skb_clone(skb, GFP_ATOMIC);
++-	if (unlikely(!skb2))
+++	clone = skb_clone(skb, GFP_ATOMIC);
+++	if (unlikely(!clone))
++ 		return -ENOMEM;
++ 
++-	if (BPF_IS_REDIRECT_INGRESS(flags))
++-		return dev_forward_skb(dev, skb2);
+++	/* For direct write, we need to keep the invariant that the skbs
+++	 * we're dealing with need to be uncloned. Should uncloning fail
+++	 * here, we need to free the just generated clone to unclone once
+++	 * again.
+++	 */
+++	ret = bpf_try_make_head_writable(skb);
+++	if (unlikely(ret)) {
+++		kfree_skb(clone);
+++		return -ENOMEM;
+++	}
++ 
++-	skb2->dev = dev;
++-	skb_sender_cpu_clear(skb2);
++-	return dev_queue_xmit(skb2);
+++	return __bpf_redirect(clone, dev, flags);
++ }
++ 
++-const struct bpf_func_proto bpf_clone_redirect_proto = {
+++static const struct bpf_func_proto bpf_clone_redirect_proto = {
++ 	.func           = bpf_clone_redirect,
++ 	.gpl_only       = false,
++ 	.ret_type       = RET_INTEGER,
++@@ -1432,42 +1977,38 @@ const struct bpf_func_proto bpf_clone_re
++ 	.arg3_type      = ARG_ANYTHING,
++ };
++ 
++-struct redirect_info {
++-	u32 ifindex;
++-	u32 flags;
++-};
+++DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
+++EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
++ 
++-static DEFINE_PER_CPU(struct redirect_info, redirect_info);
++-static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
++ {
++-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+++	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+++
+++	if (unlikely(flags & ~(BPF_F_INGRESS)))
+++		return TC_ACT_SHOT;
++ 
++-	ri->ifindex = ifindex;
++ 	ri->flags = flags;
+++	ri->tgt_index = ifindex;
+++
++ 	return TC_ACT_REDIRECT;
++ }
++ 
++ int skb_do_redirect(struct sk_buff *skb)
++ {
++-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+++	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
++ 	struct net_device *dev;
++ 
++-	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
++-	ri->ifindex = 0;
+++	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
+++	ri->tgt_index = 0;
++ 	if (unlikely(!dev)) {
++ 		kfree_skb(skb);
++ 		return -EINVAL;
++ 	}
++ 
++-	if (BPF_IS_REDIRECT_INGRESS(ri->flags))
++-		return dev_forward_skb(dev, skb);
++-
++-	skb->dev = dev;
++-	skb_sender_cpu_clear(skb);
++-	return dev_queue_xmit(skb);
+++	return __bpf_redirect(skb, dev, ri->flags);
++ }
++ 
++-const struct bpf_func_proto bpf_redirect_proto = {
+++static const struct bpf_func_proto bpf_redirect_proto = {
++ 	.func           = bpf_redirect,
++ 	.gpl_only       = false,
++ 	.ret_type       = RET_INTEGER,
++@@ -1475,50 +2016,75 @@ const struct bpf_func_proto bpf_redirect
++ 	.arg2_type      = ARG_ANYTHING,
++ };
++ 
++-static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
++ {
++-	return task_get_classid((struct sk_buff *) (unsigned long) r1);
+++	/* If skb_clear_hash() was called due to mangling, we can
+++	 * trigger SW recalculation here. Later access to hash
+++	 * can then use the inline skb->hash via context directly
+++	 * instead of calling this helper again.
+++	 */
+++	return skb_get_hash(skb);
++ }
++ 
++-static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
++-	.func           = bpf_get_cgroup_classid,
++-	.gpl_only       = false,
++-	.ret_type       = RET_INTEGER,
++-	.arg1_type      = ARG_PTR_TO_CTX,
+++static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
+++	.func		= bpf_get_hash_recalc,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
++ };
++ 
++-static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
++ {
++-#ifdef CONFIG_IP_ROUTE_CLASSID
++-	const struct dst_entry *dst;
+++	/* After all direct packet write, this can be used once for
+++	 * triggering a lazy recalc on next skb_get_hash() invocation.
+++	 */
+++	skb_clear_hash(skb);
+++	return 0;
+++}
++ 
++-	dst = skb_dst((struct sk_buff *) (unsigned long) r1);
++-	if (dst)
++-		return dst->tclassid;
++-#endif
+++static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
+++	.func		= bpf_set_hash_invalid,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++};
+++
+++BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
+++{
+++	/* Set user specified hash as L4(+), so that it gets returned
+++	 * on skb_get_hash() call unless BPF prog later on triggers a
+++	 * skb_clear_hash().
+++	 */
+++	__skb_set_sw_hash(skb, hash, true);
++ 	return 0;
++ }
++ 
++-static const struct bpf_func_proto bpf_get_route_realm_proto = {
++-	.func           = bpf_get_route_realm,
++-	.gpl_only       = false,
++-	.ret_type       = RET_INTEGER,
++-	.arg1_type      = ARG_PTR_TO_CTX,
+++static const struct bpf_func_proto bpf_set_hash_proto = {
+++	.func		= bpf_set_hash,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_ANYTHING,
++ };
++ 
++-static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
+++BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
+++	   u16, vlan_tci)
++ {
++-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
++-	__be16 vlan_proto = (__force __be16) r2;
+++	int ret;
++ 
++ 	if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
++ 		     vlan_proto != htons(ETH_P_8021AD)))
++ 		vlan_proto = htons(ETH_P_8021Q);
++ 
++-	return skb_vlan_push(skb, vlan_proto, vlan_tci);
+++	bpf_push_mac_rcsum(skb);
+++	ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
+++	bpf_pull_mac_rcsum(skb);
+++
+++	bpf_compute_data_pointers(skb);
+++	return ret;
++ }
++ 
++-const struct bpf_func_proto bpf_skb_vlan_push_proto = {
+++static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
++ 	.func           = bpf_skb_vlan_push,
++ 	.gpl_only       = false,
++ 	.ret_type       = RET_INTEGER,
++@@ -1526,116 +2092,401 @@ const struct bpf_func_proto bpf_skb_vlan
++ 	.arg2_type      = ARG_ANYTHING,
++ 	.arg3_type      = ARG_ANYTHING,
++ };
++-EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
++ 
++-static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+++BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
++ {
++-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+++	int ret;
+++
+++	bpf_push_mac_rcsum(skb);
+++	ret = skb_vlan_pop(skb);
+++	bpf_pull_mac_rcsum(skb);
++ 
++-	return skb_vlan_pop(skb);
+++	bpf_compute_data_pointers(skb);
+++	return ret;
++ }
++ 
++-const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
+++static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
++ 	.func           = bpf_skb_vlan_pop,
++ 	.gpl_only       = false,
++ 	.ret_type       = RET_INTEGER,
++ 	.arg1_type      = ARG_PTR_TO_CTX,
++ };
++-EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
++ 
++-bool bpf_helper_changes_skb_data(void *func)
+++BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
++ {
++-	if (func == bpf_skb_vlan_push)
++-		return true;
++-	if (func == bpf_skb_vlan_pop)
++-		return true;
++-	if (func == bpf_skb_store_bytes)
++-		return true;
++-	if (func == bpf_l3_csum_replace)
++-		return true;
++-	if (func == bpf_l4_csum_replace)
++-		return true;
+++	/* We only allow a restricted subset to be changed for now. */
+++	if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
+++		     !skb_pkt_type_ok(pkt_type)))
+++		return -EINVAL;
++ 
++-	return false;
+++	skb->pkt_type = pkt_type;
+++	return 0;
+++}
+++
+++static const struct bpf_func_proto bpf_skb_change_type_proto = {
+++	.func		= bpf_skb_change_type,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_ANYTHING,
+++};
+++
+++#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK	(BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
+++					 BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+++
+++#define BPF_F_ADJ_ROOM_MASK		(BPF_F_ADJ_ROOM_FIXED_GSO | \
+++					 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
+++					 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
+++					 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+++					 BPF_F_ADJ_ROOM_ENCAP_L2( \
+++					  BPF_ADJ_ROOM_ENCAP_L2_MASK))
+++
+++#define BPF_SKB_MAX_LEN SKB_MAX_ALLOC
+++
+++static u32 __bpf_skb_min_len(const struct sk_buff *skb)
+++{
+++	u32 min_len = skb_network_offset(skb);
+++
+++	if (skb_transport_header_was_set(skb))
+++		min_len = skb_transport_offset(skb);
+++	if (skb->ip_summed == CHECKSUM_PARTIAL)
+++		min_len = skb_checksum_start_offset(skb) +
+++			  skb->csum_offset + sizeof(__sum16);
+++	return min_len;
++ }
++ 
++-static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+++static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
++ {
++-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
++-	struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
++-	struct ip_tunnel_info *info = skb_tunnel_info(skb);
+++	unsigned int old_len = skb->len;
+++	int ret;
++ 
++-	if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info))
+++	ret = __skb_grow_rcsum(skb, new_len);
+++	if (!ret)
+++		memset(skb->data + old_len, 0, new_len - old_len);
+++	return ret;
+++}
+++
+++static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
+++{
+++	return __skb_trim_rcsum(skb, new_len);
+++}
+++
+++static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
+++					u64 flags)
+++{
+++	u32 max_len = BPF_SKB_MAX_LEN;
+++	u32 min_len = __bpf_skb_min_len(skb);
+++	int ret;
+++
+++	if (unlikely(flags || new_len > max_len || new_len < min_len))
++ 		return -EINVAL;
++-	if (ip_tunnel_info_af(info) != AF_INET)
+++	if (skb->encapsulation)
+++		return -ENOTSUPP;
+++
+++	/* The basic idea of this helper is that it's performing the
+++	 * needed work to either grow or trim an skb, and eBPF program
+++	 * rewrites the rest via helpers like bpf_skb_store_bytes(),
+++	 * bpf_lX_csum_replace() and others rather than passing a raw
+++	 * buffer here. This one is a slow path helper and intended
+++	 * for replies with control messages.
+++	 *
+++	 * Like in bpf_skb_change_proto(), we want to keep this rather
+++	 * minimal and without protocol specifics so that we are able
+++	 * to separate concerns as in bpf_skb_store_bytes() should only
+++	 * be the one responsible for writing buffers.
+++	 *
+++	 * It's really expected to be a slow path operation here for
+++	 * control message replies, so we're implicitly linearizing,
+++	 * uncloning and drop offloads from the skb by this.
+++	 */
+++	ret = __bpf_try_make_writable(skb, skb->len);
+++	if (!ret) {
+++		if (new_len > skb->len)
+++			ret = bpf_skb_grow_rcsum(skb, new_len);
+++		else if (new_len < skb->len)
+++			ret = bpf_skb_trim_rcsum(skb, new_len);
+++		if (!ret && skb_is_gso(skb))
+++			skb_gso_reset(skb);
+++	}
+++	return ret;
+++}
+++
+++BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
+++	   u64, flags)
+++{
+++	int ret = __bpf_skb_change_tail(skb, new_len, flags);
+++
+++	bpf_compute_data_pointers(skb);
+++	return ret;
+++}
+++
+++static const struct bpf_func_proto bpf_skb_change_tail_proto = {
+++	.func		= bpf_skb_change_tail,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_ANYTHING,
+++	.arg3_type	= ARG_ANYTHING,
+++};
+++
+++static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
+++					u64 flags)
+++{
+++	u32 max_len = BPF_SKB_MAX_LEN;
+++	u32 new_len = skb->len + head_room;
+++	int ret;
+++
+++	if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
+++		     new_len < skb->len))
++ 		return -EINVAL;
++ 
++-	to->tunnel_id = be64_to_cpu(info->key.tun_id);
++-	to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
+++	ret = skb_cow(skb, head_room);
+++	if (likely(!ret)) {
+++		/* Idea for this helper is that we currently only
+++		 * allow to expand on mac header. This means that
+++		 * skb->protocol network header, etc, stay as is.
+++		 * Compared to bpf_skb_change_tail(), we're more
+++		 * flexible due to not needing to linearize or
+++		 * reset GSO. Intention for this helper is to be
+++		 * used by an L3 skb that needs to push mac header
+++		 * for redirection into L2 device.
+++		 */
+++		__skb_push(skb, head_room);
+++		memset(skb->data, 0, head_room);
+++		skb_reset_mac_header(skb);
+++		skb_reset_mac_len(skb);
+++	}
++ 
++-	return 0;
+++	return ret;
++ }
++ 
++-const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
++-	.func		= bpf_skb_get_tunnel_key,
+++BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+++	   u64, flags)
+++{
+++	int ret = __bpf_skb_change_head(skb, head_room, flags);
+++
+++	bpf_compute_data_pointers(skb);
+++	return ret;
+++}
+++
+++static const struct bpf_func_proto bpf_skb_change_head_proto = {
+++	.func		= bpf_skb_change_head,
++ 	.gpl_only	= false,
++ 	.ret_type	= RET_INTEGER,
++ 	.arg1_type	= ARG_PTR_TO_CTX,
++-	.arg2_type	= ARG_PTR_TO_STACK,
++-	.arg3_type	= ARG_CONST_STACK_SIZE,
++-	.arg4_type	= ARG_ANYTHING,
+++	.arg2_type	= ARG_ANYTHING,
+++	.arg3_type	= ARG_ANYTHING,
++ };
++ 
++-static struct metadata_dst __percpu *md_dst;
+++void bpf_clear_redirect_map(struct bpf_map *map)
+++{
+++	struct bpf_redirect_info *ri;
+++	int cpu;
+++
+++	for_each_possible_cpu(cpu) {
+++		ri = per_cpu_ptr(&bpf_redirect_info, cpu);
+++		/* Avoid polluting remote cacheline due to writes if
+++		 * not needed. Once we pass this test, we need the
+++		 * cmpxchg() to make sure it hasn't been changed in
+++		 * the meantime by remote CPU.
+++		 */
+++		if (unlikely(READ_ONCE(ri->map) == map))
+++			cmpxchg(&ri->map, map, NULL);
+++	}
+++}
+++
+++static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
+++				  unsigned long off, unsigned long len)
+++{
+++	void *ptr = skb_header_pointer(skb, off, len, dst_buff);
+++
+++	if (unlikely(!ptr))
+++		return len;
+++	if (ptr != dst_buff)
+++		memcpy(dst_buff, ptr, len);
+++
+++	return 0;
+++}
++ 
++-static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+++BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
+++	   u64, flags, void *, meta, u64, meta_size)
++ {
++-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
++-	struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2;
++-	struct metadata_dst *md = this_cpu_ptr(md_dst);
++-	struct ip_tunnel_info *info;
+++	u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
++ 
++-	if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags))
+++	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
++ 		return -EINVAL;
+++	if (unlikely(skb_size > skb->len))
+++		return -EFAULT;
++ 
++-	skb_dst_drop(skb);
++-	dst_hold((struct dst_entry *) md);
++-	skb_dst_set(skb, (struct dst_entry *) md);
+++	return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
+++				bpf_skb_copy);
+++}
++ 
++-	info = &md->u.tun_info;
++-	info->mode = IP_TUNNEL_INFO_TX;
++-	info->key.tun_flags = TUNNEL_KEY;
++-	info->key.tun_id = cpu_to_be64(from->tunnel_id);
++-	info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+++static const struct bpf_func_proto bpf_skb_event_output_proto = {
+++	.func		= bpf_skb_event_output,
+++	.gpl_only	= true,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_CONST_MAP_PTR,
+++	.arg3_type	= ARG_ANYTHING,
+++	.arg4_type	= ARG_PTR_TO_MEM,
+++	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
+++};
+++
+++
+++const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
+++EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
+++
+++#ifdef CONFIG_XFRM
+++BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
+++	   struct bpf_xfrm_state *, to, u32, size, u64, flags)
+++{
+++	const struct sec_path *sp = skb_sec_path(skb);
+++	const struct xfrm_state *x;
+++
+++	if (!sp || unlikely(index >= sp->len || flags))
+++		goto err_clear;
+++
+++	x = sp->xvec[index];
+++
+++	if (unlikely(size != sizeof(struct bpf_xfrm_state)))
+++		goto err_clear;
+++
+++	to->reqid = x->props.reqid;
+++	to->spi = x->id.spi;
+++	to->family = x->props.family;
+++	to->ext = 0;
+++
+++	if (to->family == AF_INET6) {
+++		memcpy(to->remote_ipv6, x->props.saddr.a6,
+++		       sizeof(to->remote_ipv6));
+++	} else {
+++		to->remote_ipv4 = x->props.saddr.a4;
+++		memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
+++	}
++ 
++ 	return 0;
+++err_clear:
+++	memset(to, 0, size);
+++	return -EINVAL;
++ }
++ 
++-const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
++-	.func		= bpf_skb_set_tunnel_key,
+++static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
+++	.func		= bpf_skb_get_xfrm_state,
++ 	.gpl_only	= false,
++ 	.ret_type	= RET_INTEGER,
++ 	.arg1_type	= ARG_PTR_TO_CTX,
++-	.arg2_type	= ARG_PTR_TO_STACK,
++-	.arg3_type	= ARG_CONST_STACK_SIZE,
++-	.arg4_type	= ARG_ANYTHING,
+++	.arg2_type	= ARG_ANYTHING,
+++	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
+++	.arg4_type	= ARG_CONST_SIZE,
+++	.arg5_type	= ARG_ANYTHING,
++ };
+++#endif
+++
++ 
++-static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void)
+++#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+++static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
+++			     bool ingress)
++ {
++-	if (!md_dst) {
++-		/* race is not possible, since it's called from
++-		 * verifier that is holding verifier mutex
++-		 */
++-		md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL);
++-		if (!md_dst)
++-			return NULL;
+++	return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
+++}
+++#endif
+++
+++BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
+++	   u32, len)
+++{
+++	switch (type) {
+++#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+++	case BPF_LWT_ENCAP_IP:
+++		return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
+++#endif
+++	default:
+++		return -EINVAL;
++ 	}
++-	return &bpf_skb_set_tunnel_key_proto;
+++}
+++
+++BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
+++	   void *, hdr, u32, len)
+++{
+++	switch (type) {
+++#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+++	case BPF_LWT_ENCAP_IP:
+++		return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
+++#endif
+++	default:
+++		return -EINVAL;
+++	}
+++}
+++
+++static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
+++	.func		= bpf_lwt_in_push_encap,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_ANYTHING,
+++	.arg3_type	= ARG_PTR_TO_MEM,
+++	.arg4_type	= ARG_CONST_SIZE
+++};
+++
+++static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
+++	.func		= bpf_lwt_xmit_push_encap,
+++	.gpl_only	= false,
+++	.ret_type	= RET_INTEGER,
+++	.arg1_type	= ARG_PTR_TO_CTX,
+++	.arg2_type	= ARG_ANYTHING,
+++	.arg3_type	= ARG_PTR_TO_MEM,
+++	.arg4_type	= ARG_CONST_SIZE
+++};
+++
+++bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+++				  struct bpf_insn_access_aux *info)
+++{
+++	return false;
+++}
+++
+++u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+++				    const struct bpf_insn *si,
+++				    struct bpf_insn *insn_buf,
+++				    struct bpf_prog *prog, u32 *target_size)
+++{
+++	return 0;
+++}
+++
+++BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
+++{
+++	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+++		return (unsigned long)sk;
+++
+++	return (unsigned long)NULL;
+++}
+++
+++const struct bpf_func_proto bpf_tcp_sock_proto = {
+++	.func		= bpf_tcp_sock,
+++	.gpl_only	= false,
+++	.ret_type	= RET_PTR_TO_TCP_SOCK_OR_NULL,
+++	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+++};
+++
+++bool bpf_helper_changes_pkt_data(void *func)
+++{
+++	if (func == bpf_skb_vlan_push ||
+++	    func == bpf_skb_vlan_pop ||
+++	    func == bpf_skb_store_bytes ||
+++	    func == bpf_skb_change_head ||
+++	    func == bpf_skb_change_tail ||
+++	    func == bpf_skb_pull_data ||
+++	    func == bpf_clone_redirect ||
+++	    func == bpf_l3_csum_replace ||
+++	    func == bpf_l4_csum_replace ||
+++	    func == bpf_lwt_in_push_encap ||
+++	    func == bpf_lwt_xmit_push_encap)
+++		return true;
+++
+++	return false;
++ }
++ 
++ static const struct bpf_func_proto *
++-sk_filter_func_proto(enum bpf_func_id func_id)
+++bpf_base_func_proto(enum bpf_func_id func_id)
++ {
++ 	switch (func_id) {
++ 	case BPF_FUNC_map_lookup_elem:
++@@ -1644,283 +2495,1168 @@ sk_filter_func_proto(enum bpf_func_id fu
++ 		return &bpf_map_update_elem_proto;
++ 	case BPF_FUNC_map_delete_elem:
++ 		return &bpf_map_delete_elem_proto;
+++	case BPF_FUNC_map_push_elem:
+++		return &bpf_map_push_elem_proto;
+++	case BPF_FUNC_map_pop_elem:
+++		return &bpf_map_pop_elem_proto;
+++	case BPF_FUNC_map_peek_elem:
+++		return &bpf_map_peek_elem_proto;
++ 	case BPF_FUNC_get_prandom_u32:
++ 		return &bpf_get_prandom_u32_proto;
++ 	case BPF_FUNC_get_smp_processor_id:
++-		return &bpf_get_smp_processor_id_proto;
+++		return &bpf_get_raw_smp_processor_id_proto;
+++	case BPF_FUNC_get_numa_node_id:
+++		return &bpf_get_numa_node_id_proto;
++ 	case BPF_FUNC_tail_call:
++ 		return &bpf_tail_call_proto;
++ 	case BPF_FUNC_ktime_get_ns:
++ 		return &bpf_ktime_get_ns_proto;
+++	default:
+++		break;
+++	}
+++
+++	if (!capable(CAP_SYS_ADMIN))
+++		return NULL;
+++
+++	switch (func_id) {
+++	case BPF_FUNC_spin_lock:
+++		return &bpf_spin_lock_proto;
+++	case BPF_FUNC_spin_unlock:
+++		return &bpf_spin_unlock_proto;
++ 	case BPF_FUNC_trace_printk:
++-		if (capable(CAP_SYS_ADMIN))
++-			return bpf_get_trace_printk_proto();
+++		return bpf_get_trace_printk_proto();
++ 	default:
++ 		return NULL;
++ 	}
++ }
++ 
++ static const struct bpf_func_proto *
++-tc_cls_act_func_proto(enum bpf_func_id func_id)
+++sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	return bpf_base_func_proto(func_id);
+++}
+++
+++static const struct bpf_func_proto *
+++sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	return bpf_base_func_proto(func_id);
+++}
+++
+++const struct bpf_func_proto bpf_sk_storage_get_proto __weak;
+++const struct bpf_func_proto bpf_sk_storage_delete_proto __weak;
+++
+++static const struct bpf_func_proto *
+++tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
++ {
++ 	switch (func_id) {
++ 	case BPF_FUNC_skb_store_bytes:
++ 		return &bpf_skb_store_bytes_proto;
+++	case BPF_FUNC_skb_load_bytes:
+++		return &bpf_skb_load_bytes_proto;
+++	case BPF_FUNC_skb_load_bytes_relative:
+++		return &bpf_skb_load_bytes_relative_proto;
+++	case BPF_FUNC_skb_pull_data:
+++		return &bpf_skb_pull_data_proto;
+++	case BPF_FUNC_csum_diff:
+++		return &bpf_csum_diff_proto;
+++	case BPF_FUNC_csum_update:
+++		return &bpf_csum_update_proto;
++ 	case BPF_FUNC_l3_csum_replace:
++ 		return &bpf_l3_csum_replace_proto;
++ 	case BPF_FUNC_l4_csum_replace:
++ 		return &bpf_l4_csum_replace_proto;
++ 	case BPF_FUNC_clone_redirect:
++ 		return &bpf_clone_redirect_proto;
++-	case BPF_FUNC_get_cgroup_classid:
++-		return &bpf_get_cgroup_classid_proto;
++ 	case BPF_FUNC_skb_vlan_push:
++ 		return &bpf_skb_vlan_push_proto;
++ 	case BPF_FUNC_skb_vlan_pop:
++ 		return &bpf_skb_vlan_pop_proto;
++-	case BPF_FUNC_skb_get_tunnel_key:
++-		return &bpf_skb_get_tunnel_key_proto;
++-	case BPF_FUNC_skb_set_tunnel_key:
++-		return bpf_get_skb_set_tunnel_key_proto();
+++	case BPF_FUNC_skb_change_type:
+++		return &bpf_skb_change_type_proto;
+++	case BPF_FUNC_skb_change_tail:
+++		return &bpf_skb_change_tail_proto;
+++	case BPF_FUNC_redirect:
+++		return &bpf_redirect_proto;
+++	case BPF_FUNC_get_hash_recalc:
+++		return &bpf_get_hash_recalc_proto;
+++	case BPF_FUNC_set_hash_invalid:
+++		return &bpf_set_hash_invalid_proto;
+++	case BPF_FUNC_set_hash:
+++		return &bpf_set_hash_proto;
+++	case BPF_FUNC_perf_event_output:
+++		return &bpf_skb_event_output_proto;
+++	case BPF_FUNC_get_smp_processor_id:
+++		return &bpf_get_smp_processor_id_proto;
+++#ifdef CONFIG_XFRM
+++	case BPF_FUNC_skb_get_xfrm_state:
+++		return &bpf_skb_get_xfrm_state_proto;
+++#endif
+++	default:
+++		return bpf_base_func_proto(func_id);
+++	}
+++}
+++
+++static const struct bpf_func_proto *
+++xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	switch (func_id) {
+++	default:
+++		return bpf_base_func_proto(func_id);
+++	}
+++}
+++
+++const struct bpf_func_proto bpf_sock_map_update_proto __weak;
+++const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
+++
+++static const struct bpf_func_proto *
+++sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	switch (func_id) {
+++	default:
+++		return bpf_base_func_proto(func_id);
+++	}
+++}
+++
+++const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
+++const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
+++
+++static const struct bpf_func_proto *
+++sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	switch (func_id) {
+++	default:
+++		return bpf_base_func_proto(func_id);
+++	}
+++}
+++
+++const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
+++const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
+++
+++static const struct bpf_func_proto *
+++sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	return bpf_base_func_proto(func_id);
+++}
+++
+++static const struct bpf_func_proto *
+++flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	switch (func_id) {
+++	default:
+++		return bpf_base_func_proto(func_id);
+++	}
+++}
+++
+++static const struct bpf_func_proto *
+++lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	switch (func_id) {
+++	case BPF_FUNC_skb_load_bytes:
+++		return &bpf_skb_load_bytes_proto;
+++	case BPF_FUNC_skb_pull_data:
+++		return &bpf_skb_pull_data_proto;
+++	case BPF_FUNC_csum_diff:
+++		return &bpf_csum_diff_proto;
+++	case BPF_FUNC_get_hash_recalc:
+++		return &bpf_get_hash_recalc_proto;
+++	case BPF_FUNC_perf_event_output:
+++		return &bpf_skb_event_output_proto;
+++	case BPF_FUNC_get_smp_processor_id:
+++		return &bpf_get_smp_processor_id_proto;
+++	default:
+++		return bpf_base_func_proto(func_id);
+++	}
+++}
+++
+++static const struct bpf_func_proto *
+++lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	switch (func_id) {
+++	case BPF_FUNC_lwt_push_encap:
+++		return &bpf_lwt_in_push_encap_proto;
+++	default:
+++		return lwt_out_func_proto(func_id, prog);
+++	}
+++}
+++
+++static const struct bpf_func_proto *
+++lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+++{
+++	switch (func_id) {
++ 	case BPF_FUNC_redirect:
++ 		return &bpf_redirect_proto;
++-	case BPF_FUNC_get_route_realm:
++-		return &bpf_get_route_realm_proto;
+++	case BPF_FUNC_clone_redirect:
+++		return &bpf_clone_redirect_proto;
+++	case BPF_FUNC_skb_change_tail:
+++		return &bpf_skb_change_tail_proto;
+++	case BPF_FUNC_skb_change_head:
+++		return &bpf_skb_change_head_proto;
+++	case BPF_FUNC_skb_store_bytes:
+++		return &bpf_skb_store_bytes_proto;
+++	case BPF_FUNC_csum_update:
+++		return &bpf_csum_update_proto;
+++	case BPF_FUNC_l3_csum_replace:
+++		return &bpf_l3_csum_replace_proto;
+++	case BPF_FUNC_l4_csum_replace:
+++		return &bpf_l4_csum_replace_proto;
+++	case BPF_FUNC_set_hash_invalid:
+++		return &bpf_set_hash_invalid_proto;
+++	case BPF_FUNC_lwt_push_encap:
+++		return &bpf_lwt_xmit_push_encap_proto;
++ 	default:
++-		return sk_filter_func_proto(func_id);
+++		return lwt_out_func_proto(func_id, prog);
++ 	}
++ }
++ 
++-static bool __is_valid_access(int off, int size, enum bpf_access_type type)
+++static const struct bpf_func_proto *
+++lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
++ {
++-	/* check bounds */
+++	switch (func_id) {
+++	default:
+++		return lwt_out_func_proto(func_id, prog);
+++	}
+++}
+++
+++static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
+++				    const struct bpf_prog *prog,
+++				    struct bpf_insn_access_aux *info)
+++{
+++	const int size_default = sizeof(__u32);
+++
++ 	if (off < 0 || off >= sizeof(struct __sk_buff))
++ 		return false;
++ 
++-	/* disallow misaligned access */
+++	/* The verifier guarantees that size > 0. */
++ 	if (off % size != 0)
++ 		return false;
++ 
++-	/* all __sk_buff fields are __u32 */
++-	if (size != 4)
+++	switch (off) {
+++	case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+++		if (off + size > offsetofend(struct __sk_buff, cb[4]))
+++			return false;
+++		break;
+++	case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
+++	case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
+++	case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
+++	case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
+++	case bpf_ctx_range(struct __sk_buff, data):
+++	case bpf_ctx_range(struct __sk_buff, data_meta):
+++	case bpf_ctx_range(struct __sk_buff, data_end):
+++		if (size != size_default)
+++			return false;
+++		break;
+++	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
++ 		return false;
+++	case bpf_ctx_range(struct __sk_buff, tstamp):
+++		if (size != sizeof(__u64))
+++			return false;
+++		break;
+++	case offsetof(struct __sk_buff, sk):
+++		if (type == BPF_WRITE || size != sizeof(__u64))
+++			return false;
+++		info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
+++		break;
+++	default:
+++		/* Only narrow read access allowed for now. */
+++		if (type == BPF_WRITE) {
+++			if (size != size_default)
+++				return false;
+++		} else {
+++			bpf_ctx_record_field_size(info, size_default);
+++			if (!bpf_ctx_narrow_access_ok(off, size, size_default))
+++				return false;
+++		}
+++	}
++ 
++ 	return true;
++ }
++ 
++ static bool sk_filter_is_valid_access(int off, int size,
++-				      enum bpf_access_type type)
+++				      enum bpf_access_type type,
+++				      const struct bpf_prog *prog,
+++				      struct bpf_insn_access_aux *info)
++ {
++-	if (off == offsetof(struct __sk_buff, tc_classid))
+++	return false;
+++}
+++
+++static bool lwt_is_valid_access(int off, int size,
+++				enum bpf_access_type type,
+++				const struct bpf_prog *prog,
+++				struct bpf_insn_access_aux *info)
+++{
+++	switch (off) {
+++	case bpf_ctx_range(struct __sk_buff, tc_classid):
+++	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+++	case bpf_ctx_range(struct __sk_buff, data_meta):
+++	case bpf_ctx_range(struct __sk_buff, tstamp):
+++	case bpf_ctx_range(struct __sk_buff, wire_len):
++ 		return false;
+++	}
++ 
++ 	if (type == BPF_WRITE) {
++ 		switch (off) {
++-		case offsetof(struct __sk_buff, cb[0]) ...
++-			offsetof(struct __sk_buff, cb[4]):
+++		case bpf_ctx_range(struct __sk_buff, mark):
+++		case bpf_ctx_range(struct __sk_buff, priority):
+++		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
++ 			break;
++ 		default:
++ 			return false;
++ 		}
++ 	}
++ 
++-	return __is_valid_access(off, size, type);
+++	switch (off) {
+++	case bpf_ctx_range(struct __sk_buff, data):
+++		info->reg_type = PTR_TO_PACKET;
+++		break;
+++	case bpf_ctx_range(struct __sk_buff, data_end):
+++		info->reg_type = PTR_TO_PACKET_END;
+++		break;
+++	}
+++
+++	return bpf_skb_is_valid_access(off, size, type, prog, info);
++ }
++ 
++-static bool tc_cls_act_is_valid_access(int off, int size,
++-				       enum bpf_access_type type)
+++
+++bool bpf_sock_common_is_valid_access(int off, int size,
+++				     enum bpf_access_type type,
+++				     struct bpf_insn_access_aux *info)
+++{
+++	switch (off) {
+++	case bpf_ctx_range_till(struct bpf_sock, type, priority):
+++		return false;
+++	default:
+++		return bpf_sock_is_valid_access(off, size, type, info);
+++	}
+++}
+++
+++bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+++			      struct bpf_insn_access_aux *info)
+++{
+++	return false;
+++}
+++
+++static bool sock_filter_is_valid_access(int off, int size,
+++					enum bpf_access_type type,
+++					const struct bpf_prog *prog,
+++					struct bpf_insn_access_aux *info)
+++{
+++	return false;
+++}
+++
+++static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
+++			     const struct bpf_prog *prog)
+++{
+++	/* Neither direct read nor direct write requires any preliminary
+++	 * action.
+++	 */
+++	return 0;
+++}
+++
+++static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
+++				const struct bpf_prog *prog, int drop_verdict)
+++{
+++	struct bpf_insn *insn = insn_buf;
+++
+++	if (!direct_write)
+++		return 0;
+++
+++	/* if (!skb->cloned)
+++	 *       goto start;
+++	 *
+++	 * (Fast-path, otherwise approximation that we might be
+++	 *  a clone, do the rest in helper.)
+++	 */
+++	*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
+++	*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
+++	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
+++
+++	/* ret = bpf_skb_pull_data(skb, 0); */
+++	*insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+++	*insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
+++	*insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+++			       BPF_FUNC_skb_pull_data);
+++	/* if (!ret)
+++	 *      goto restore;
+++	 * return TC_ACT_SHOT;
+++	 */
+++	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
+++	*insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
+++	*insn++ = BPF_EXIT_INSN();
+++
+++	/* restore: */
+++	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+++	/* start: */
+++	*insn++ = prog->insnsi[0];
+++
+++	return insn - insn_buf;
+++}
+++
+++static int bpf_gen_ld_abs(const struct bpf_insn *orig,
+++			  struct bpf_insn *insn_buf)
+++{
+++	bool indirect = BPF_MODE(orig->code) == BPF_IND;
+++	struct bpf_insn *insn = insn_buf;
+++
+++	if (!indirect) {
+++		*insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
+++	} else {
+++		*insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
+++		if (orig->imm)
+++			*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
+++	}
+++	/* We're guaranteed here that CTX is in R6. */
+++	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
+++
+++	switch (BPF_SIZE(orig->code)) {
+++	case BPF_B:
+++		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
+++		break;
+++	case BPF_H:
+++		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
+++		break;
+++	case BPF_W:
+++		*insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
+++		break;
+++	}
+++
+++	*insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2);
+++	*insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
+++	*insn++ = BPF_EXIT_INSN();
+++
+++	return insn - insn_buf;
+++}
+++
+++static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
+++			       const struct bpf_prog *prog)
++ {
++-	if (off == offsetof(struct __sk_buff, tc_classid))
++-		return type == BPF_WRITE ? true : false;
+++	return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
+++}
++ 
+++static bool tc_cls_act_is_valid_access(int off, int size,
+++				       enum bpf_access_type type,
+++				       const struct bpf_prog *prog,
+++				       struct bpf_insn_access_aux *info)
+++{
++ 	if (type == BPF_WRITE) {
++ 		switch (off) {
++-		case offsetof(struct __sk_buff, mark):
++-		case offsetof(struct __sk_buff, tc_index):
++-		case offsetof(struct __sk_buff, priority):
++-		case offsetof(struct __sk_buff, cb[0]) ...
++-			offsetof(struct __sk_buff, cb[4]):
+++		case bpf_ctx_range(struct __sk_buff, mark):
+++		case bpf_ctx_range(struct __sk_buff, tc_index):
+++		case bpf_ctx_range(struct __sk_buff, priority):
+++		case bpf_ctx_range(struct __sk_buff, tc_classid):
+++		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+++		case bpf_ctx_range(struct __sk_buff, tstamp):
+++		case bpf_ctx_range(struct __sk_buff, queue_mapping):
++ 			break;
++ 		default:
++ 			return false;
++ 		}
++ 	}
++-	return __is_valid_access(off, size, type);
+++
+++	switch (off) {
+++	case bpf_ctx_range(struct __sk_buff, data):
+++		info->reg_type = PTR_TO_PACKET;
+++		break;
+++	case bpf_ctx_range(struct __sk_buff, data_meta):
+++		info->reg_type = PTR_TO_PACKET_META;
+++		break;
+++	case bpf_ctx_range(struct __sk_buff, data_end):
+++		info->reg_type = PTR_TO_PACKET_END;
+++		break;
+++	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+++		return false;
+++	}
+++
+++	return bpf_skb_is_valid_access(off, size, type, prog, info);
+++}
+++
+++static bool xdp_is_valid_access(int off, int size,
+++				enum bpf_access_type type,
+++				const struct bpf_prog *prog,
+++				struct bpf_insn_access_aux *info)
+++{
+++	return false;
+++}
+++
+++void bpf_warn_invalid_xdp_action(u32 act)
+++{
+++}
+++EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+++
+++static bool sock_addr_is_valid_access(int off, int size,
+++				      enum bpf_access_type type,
+++				      const struct bpf_prog *prog,
+++				      struct bpf_insn_access_aux *info)
+++{
+++	return false;
+++}
+++
+++static bool sock_ops_is_valid_access(int off, int size,
+++				     enum bpf_access_type type,
+++				     const struct bpf_prog *prog,
+++				     struct bpf_insn_access_aux *info)
+++{
+++	return false;
+++}
+++
+++static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
+++			   const struct bpf_prog *prog)
+++{
+++	return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
+++}
+++
+++static bool sk_skb_is_valid_access(int off, int size,
+++				   enum bpf_access_type type,
+++				   const struct bpf_prog *prog,
+++				   struct bpf_insn_access_aux *info)
+++{
+++	return false;
+++}
+++
+++static bool sk_msg_is_valid_access(int off, int size,
+++				   enum bpf_access_type type,
+++				   const struct bpf_prog *prog,
+++				   struct bpf_insn_access_aux *info)
+++{
+++	return false;
+++}
+++
+++static bool flow_dissector_is_valid_access(int off, int size,
+++					   enum bpf_access_type type,
+++					   const struct bpf_prog *prog,
+++					   struct bpf_insn_access_aux *info)
+++{
+++	return false;
++ }
++ 
++-static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
++-				      int src_reg, int ctx_off,
++-				      struct bpf_insn *insn_buf,
++-				      struct bpf_prog *prog)
+++static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
+++					     const struct bpf_insn *si,
+++					     struct bpf_insn *insn_buf,
+++					     struct bpf_prog *prog,
+++					     u32 *target_size)
+++
+++{
+++	return 0;
+++}
+++
+++static u32 bpf_convert_ctx_access(enum bpf_access_type type,
+++				  const struct bpf_insn *si,
+++				  struct bpf_insn *insn_buf,
+++				  struct bpf_prog *prog, u32 *target_size)
++ {
++ 	struct bpf_insn *insn = insn_buf;
+++	int off;
++ 
++-	switch (ctx_off) {
+++	switch (si->off) {
++ 	case offsetof(struct __sk_buff, len):
++-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
++-
++-		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
++-				      offsetof(struct sk_buff, len));
+++		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+++				      bpf_target_off(struct sk_buff, len, 4,
+++						     target_size));
++ 		break;
++ 
++ 	case offsetof(struct __sk_buff, protocol):
++-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
++-
++-		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
++-				      offsetof(struct sk_buff, protocol));
+++		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+++				      bpf_target_off(struct sk_buff, protocol, 2,
+++						     target_size));
++ 		break;
++ 
++ 	case offsetof(struct __sk_buff, vlan_proto):
++-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
++-
++-		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
++-				      offsetof(struct sk_buff, vlan_proto));
+++		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+++				      bpf_target_off(struct sk_buff, vlan_proto, 2,
+++						     target_size));
++ 		break;
++ 
++ 	case offsetof(struct __sk_buff, priority):
++-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
++-
++ 		if (type == BPF_WRITE)
++-			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
++-					      offsetof(struct sk_buff, priority));
+++			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+++					      bpf_target_off(struct sk_buff, priority, 4,
+++							     target_size));
++ 		else
++-			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
++-					      offsetof(struct sk_buff, priority));
+++			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+++					      bpf_target_off(struct sk_buff, priority, 4,
+++							     target_size));
++ 		break;
++ 
++ 	case offsetof(struct __sk_buff, ingress_ifindex):
++-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
++-
++-		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
++-				      offsetof(struct sk_buff, skb_iif));
+++		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+++				      bpf_target_off(struct sk_buff, skb_iif, 4,
+++						     target_size));
++ 		break;
++ 
++ 	case offsetof(struct __sk_buff, ifindex):
++-		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
++-
++-		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
++-				      dst_reg, src_reg,
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
+++				      si->dst_reg, si->src_reg,
++ 				      offsetof(struct sk_buff, dev));
++-		*insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
++-		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
++-				      offsetof(struct net_device, ifindex));
+++		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+++		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+++				      bpf_target_off(struct net_device, ifindex, 4,
+++						     target_size));
++ 		break;
++ 
++ 	case offsetof(struct __sk_buff, hash):
++-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
++-
++-		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
++-				      offsetof(struct sk_buff, hash));
+++		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+++				      bpf_target_off(struct sk_buff, hash, 4,
+++						     target_size));
++ 		break;
++ 
++ 	case offsetof(struct __sk_buff, mark):
++-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
++-
++ 		if (type == BPF_WRITE)
++-			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
++-					      offsetof(struct sk_buff, mark));
+++			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+++					      bpf_target_off(struct sk_buff, mark, 4,
+++							     target_size));
++ 		else
++-			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
++-					      offsetof(struct sk_buff, mark));
+++			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+++					      bpf_target_off(struct sk_buff, mark, 4,
+++							     target_size));
++ 		break;
++ 
++ 	case offsetof(struct __sk_buff, pkt_type):
++-		return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
+++		*target_size = 1;
+++		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
+++				      PKT_TYPE_OFFSET());
+++		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
+++#ifdef __BIG_ENDIAN_BITFIELD
+++		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
+++#endif
+++		break;
++ 
++ 	case offsetof(struct __sk_buff, queue_mapping):
++-		return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn);
+++		if (type == BPF_WRITE) {
+++			*insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, USHRT_MAX, 1);
+++			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
+++					      bpf_target_off(struct sk_buff,
+++							     queue_mapping,
+++							     2, target_size));
+++		} else {
+++			*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+++					      bpf_target_off(struct sk_buff,
+++							     queue_mapping,
+++							     2, target_size));
+++		}
+++		break;
++ 
++ 	case offsetof(struct __sk_buff, vlan_present):
++-		return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
++-					  dst_reg, src_reg, insn);
+++		*target_size = 1;
+++		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
+++				      PKT_VLAN_PRESENT_OFFSET());
+++		if (PKT_VLAN_PRESENT_BIT)
+++			*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT);
+++		if (PKT_VLAN_PRESENT_BIT < 7)
+++			*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
+++		break;
++ 
++ 	case offsetof(struct __sk_buff, vlan_tci):
++-		return convert_skb_access(SKF_AD_VLAN_TAG,
++-					  dst_reg, src_reg, insn);
+++		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+++				      bpf_target_off(struct sk_buff, vlan_tci, 2,
+++						     target_size));
+++		break;
++ 
++ 	case offsetof(struct __sk_buff, cb[0]) ...
++-		offsetof(struct __sk_buff, cb[4]):
+++	     offsetofend(struct __sk_buff, cb[4]) - 1:
++ 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+++		BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
+++			      offsetof(struct qdisc_skb_cb, data)) %
+++			     sizeof(__u64));
++ 
++ 		prog->cb_access = 1;
++-		ctx_off -= offsetof(struct __sk_buff, cb[0]);
++-		ctx_off += offsetof(struct sk_buff, cb);
++-		ctx_off += offsetof(struct qdisc_skb_cb, data);
+++		off  = si->off;
+++		off -= offsetof(struct __sk_buff, cb[0]);
+++		off += offsetof(struct sk_buff, cb);
+++		off += offsetof(struct qdisc_skb_cb, data);
++ 		if (type == BPF_WRITE)
++-			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+++			*insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
+++					      si->src_reg, off);
++ 		else
++-			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+++			*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
+++					      si->src_reg, off);
++ 		break;
++ 
++ 	case offsetof(struct __sk_buff, tc_classid):
++-		ctx_off -= offsetof(struct __sk_buff, tc_classid);
++-		ctx_off += offsetof(struct sk_buff, cb);
++-		ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
++-		WARN_ON(type != BPF_WRITE);
++-		*insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+++		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2);
+++
+++		off  = si->off;
+++		off -= offsetof(struct __sk_buff, tc_classid);
+++		off += offsetof(struct sk_buff, cb);
+++		off += offsetof(struct qdisc_skb_cb, tc_classid);
+++		*target_size = 2;
+++		if (type == BPF_WRITE)
+++			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
+++					      si->src_reg, off);
+++		else
+++			*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
+++					      si->src_reg, off);
+++		break;
+++
+++	case offsetof(struct __sk_buff, data):
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
+++				      si->dst_reg, si->src_reg,
+++				      offsetof(struct sk_buff, data));
+++		break;
+++
+++	case offsetof(struct __sk_buff, data_meta):
+++		off  = si->off;
+++		off -= offsetof(struct __sk_buff, data_meta);
+++		off += offsetof(struct sk_buff, cb);
+++		off += offsetof(struct bpf_skb_data_end, data_meta);
+++		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+++				      si->src_reg, off);
+++		break;
+++
+++	case offsetof(struct __sk_buff, data_end):
+++		off  = si->off;
+++		off -= offsetof(struct __sk_buff, data_end);
+++		off += offsetof(struct sk_buff, cb);
+++		off += offsetof(struct bpf_skb_data_end, data_end);
+++		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+++				      si->src_reg, off);
++ 		break;
++ 
++ 	case offsetof(struct __sk_buff, tc_index):
++ #ifdef CONFIG_NET_SCHED
++-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
++-
++ 		if (type == BPF_WRITE)
++-			*insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg,
++-					      offsetof(struct sk_buff, tc_index));
+++			*insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
+++					      bpf_target_off(struct sk_buff, tc_index, 2,
+++							     target_size));
++ 		else
++-			*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
++-					      offsetof(struct sk_buff, tc_index));
++-		break;
+++			*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+++					      bpf_target_off(struct sk_buff, tc_index, 2,
+++							     target_size));
++ #else
+++		*target_size = 2;
++ 		if (type == BPF_WRITE)
++-			*insn++ = BPF_MOV64_REG(dst_reg, dst_reg);
+++			*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
++ 		else
++-			*insn++ = BPF_MOV64_IMM(dst_reg, 0);
+++			*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
+++#endif
+++		break;
+++
+++	case offsetof(struct __sk_buff, napi_id):
+++		*target_size = 4;
+++		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
+++		break;
+++	case offsetof(struct __sk_buff, family):
+++		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+++
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+++				      si->dst_reg, si->src_reg,
+++				      offsetof(struct sk_buff, sk));
+++		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+++				      bpf_target_off(struct sock_common,
+++						     skc_family,
+++						     2, target_size));
+++		break;
+++	case offsetof(struct __sk_buff, remote_ip4):
+++		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+++
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+++				      si->dst_reg, si->src_reg,
+++				      offsetof(struct sk_buff, sk));
+++		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+++				      bpf_target_off(struct sock_common,
+++						     skc_daddr,
+++						     4, target_size));
+++		break;
+++	case offsetof(struct __sk_buff, local_ip4):
+++		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+++					  skc_rcv_saddr) != 4);
+++
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+++				      si->dst_reg, si->src_reg,
+++				      offsetof(struct sk_buff, sk));
+++		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+++				      bpf_target_off(struct sock_common,
+++						     skc_rcv_saddr,
+++						     4, target_size));
+++		break;
+++	case offsetof(struct __sk_buff, remote_ip6[0]) ...
+++	     offsetof(struct __sk_buff, remote_ip6[3]):
+++#if IS_ENABLED(CONFIG_IPV6)
+++		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+++					  skc_v6_daddr.s6_addr32[0]) != 4);
+++
+++		off = si->off;
+++		off -= offsetof(struct __sk_buff, remote_ip6[0]);
+++
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+++				      si->dst_reg, si->src_reg,
+++				      offsetof(struct sk_buff, sk));
+++		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+++				      offsetof(struct sock_common,
+++					       skc_v6_daddr.s6_addr32[0]) +
+++				      off);
+++#else
+++		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+++#endif
+++		break;
+++	case offsetof(struct __sk_buff, local_ip6[0]) ...
+++	     offsetof(struct __sk_buff, local_ip6[3]):
+++#if IS_ENABLED(CONFIG_IPV6)
+++		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+++					  skc_v6_rcv_saddr.s6_addr32[0]) != 4);
+++
+++		off = si->off;
+++		off -= offsetof(struct __sk_buff, local_ip6[0]);
+++
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+++				      si->dst_reg, si->src_reg,
+++				      offsetof(struct sk_buff, sk));
+++		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+++				      offsetof(struct sock_common,
+++					       skc_v6_rcv_saddr.s6_addr32[0]) +
+++				      off);
+++#else
+++		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+++#endif
+++		break;
+++
+++	case offsetof(struct __sk_buff, remote_port):
+++		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+++
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+++				      si->dst_reg, si->src_reg,
+++				      offsetof(struct sk_buff, sk));
+++		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+++				      bpf_target_off(struct sock_common,
+++						     skc_dport,
+++						     2, target_size));
+++#ifndef __BIG_ENDIAN_BITFIELD
+++		*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
+++#endif
++ 		break;
+++
+++	case offsetof(struct __sk_buff, local_port):
+++		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+++
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+++				      si->dst_reg, si->src_reg,
+++				      offsetof(struct sk_buff, sk));
+++		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+++				      bpf_target_off(struct sock_common,
+++						     skc_num, 2, target_size));
+++		break;
+++
+++	case offsetof(struct __sk_buff, tstamp):
+++		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
+++
+++		if (type == BPF_WRITE)
+++			*insn++ = BPF_STX_MEM(BPF_DW,
+++					      si->dst_reg, si->src_reg,
+++					      bpf_target_off(struct sk_buff,
+++							     tstamp, 8,
+++							     target_size));
+++		else
+++			*insn++ = BPF_LDX_MEM(BPF_DW,
+++					      si->dst_reg, si->src_reg,
+++					      bpf_target_off(struct sk_buff,
+++							     tstamp, 8,
+++							     target_size));
+++		break;
+++
+++	case offsetof(struct __sk_buff, gso_segs):
+++		/* si->dst_reg = skb_shinfo(SKB); */
+++#ifdef NET_SKBUFF_DATA_USES_OFFSET
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
+++				      BPF_REG_AX, si->src_reg,
+++				      offsetof(struct sk_buff, end));
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
+++				      si->dst_reg, si->src_reg,
+++				      offsetof(struct sk_buff, head));
+++		*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+++#else
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
+++				      si->dst_reg, si->src_reg,
+++				      offsetof(struct sk_buff, end));
++ #endif
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
+++				      si->dst_reg, si->dst_reg,
+++				      bpf_target_off(struct skb_shared_info,
+++						     gso_segs, 2,
+++						     target_size));
+++		break;
+++	case offsetof(struct __sk_buff, wire_len):
+++		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
+++
+++		off = si->off;
+++		off -= offsetof(struct __sk_buff, wire_len);
+++		off += offsetof(struct sk_buff, cb);
+++		off += offsetof(struct qdisc_skb_cb, pkt_len);
+++		*target_size = 4;
+++		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
+++		break;
+++
+++	case offsetof(struct __sk_buff, sk):
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+++				      si->dst_reg, si->src_reg,
+++				      offsetof(struct sk_buff, sk));
+++		break;
++ 	}
++ 
++ 	return insn - insn_buf;
++ }
++ 
++-static const struct bpf_verifier_ops sk_filter_ops = {
++-	.get_func_proto = sk_filter_func_proto,
++-	.is_valid_access = sk_filter_is_valid_access,
++-	.convert_ctx_access = bpf_net_convert_ctx_access,
+++u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+++				const struct bpf_insn *si,
+++				struct bpf_insn *insn_buf,
+++				struct bpf_prog *prog, u32 *target_size)
+++{
+++	return 0;
+++}
+++
+++static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
+++					 const struct bpf_insn *si,
+++					 struct bpf_insn *insn_buf,
+++					 struct bpf_prog *prog, u32 *target_size)
+++{
+++	struct bpf_insn *insn = insn_buf;
+++
+++	switch (si->off) {
+++	case offsetof(struct __sk_buff, ifindex):
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
+++				      si->dst_reg, si->src_reg,
+++				      offsetof(struct sk_buff, dev));
+++		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+++				      bpf_target_off(struct net_device, ifindex, 4,
+++						     target_size));
+++		break;
+++	default:
+++		return bpf_convert_ctx_access(type, si, insn_buf, prog,
+++					      target_size);
+++	}
+++
+++	return insn - insn_buf;
+++}
+++
+++static u32 xdp_convert_ctx_access(enum bpf_access_type type,
+++				  const struct bpf_insn *si,
+++				  struct bpf_insn *insn_buf,
+++				  struct bpf_prog *prog, u32 *target_size)
+++{
+++	return 0;
+++}
+++
+++/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
+++ * context Structure, F is Field in context structure that contains a pointer
+++ * to Nested Structure of type NS that has the field NF.
+++ *
+++ * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
+++ * sure that SIZE is not greater than actual size of S.F.NF.
+++ *
+++ * If offset OFF is provided, the load happens from that offset relative to
+++ * offset of NF.
+++ */
+++#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF)	       \
+++	do {								       \
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg,     \
+++				      si->src_reg, offsetof(S, F));	       \
+++		*insn++ = BPF_LDX_MEM(					       \
+++			SIZE, si->dst_reg, si->dst_reg,			       \
+++			bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF),	       \
+++				       target_size)			       \
+++				+ OFF);					       \
+++	} while (0)
+++
+++#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF)			       \
+++	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF,		       \
+++					     BPF_FIELD_SIZEOF(NS, NF), 0)
+++
+++/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
+++ * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
+++ *
+++ * In addition it uses Temporary Field TF (member of struct S) as the 3rd
+++ * "register" since two registers available in convert_ctx_access are not
+++ * enough: we can't override neither SRC, since it contains value to store, nor
+++ * DST since it contains pointer to context that may be used by later
+++ * instructions. But we need a temporary place to save pointer to nested
+++ * structure whose field we want to store to.
+++ */
+++#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF)	       \
+++	do {								       \
+++		int tmp_reg = BPF_REG_9;				       \
+++		if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)	       \
+++			--tmp_reg;					       \
+++		if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)	       \
+++			--tmp_reg;					       \
+++		*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg,	       \
+++				      offsetof(S, TF));			       \
+++		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg,	       \
+++				      si->dst_reg, offsetof(S, F));	       \
+++		*insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg,	       \
+++			bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF),	       \
+++				       target_size)			       \
+++				+ OFF);					       \
+++		*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg,	       \
+++				      offsetof(S, TF));			       \
+++	} while (0)
+++
+++#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
+++						      TF)		       \
+++	do {								       \
+++		if (type == BPF_WRITE) {				       \
+++			SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE,   \
+++							 OFF, TF);	       \
+++		} else {						       \
+++			SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(		       \
+++				S, NS, F, NF, SIZE, OFF);  \
+++		}							       \
+++	} while (0)
+++
+++#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF)		       \
+++	SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(			       \
+++		S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
+++
+++static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
+++					const struct bpf_insn *si,
+++					struct bpf_insn *insn_buf,
+++					struct bpf_prog *prog, u32 *target_size)
+++{
+++	return 0;
+++}
+++
+++static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
+++				       const struct bpf_insn *si,
+++				       struct bpf_insn *insn_buf,
+++				       struct bpf_prog *prog,
+++				       u32 *target_size)
+++{
+++	return 0;
+++}
+++
+++static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
+++				     const struct bpf_insn *si,
+++				     struct bpf_insn *insn_buf,
+++				     struct bpf_prog *prog, u32 *target_size)
+++{
+++	return 0;
+++}
+++
+++static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
+++				     const struct bpf_insn *si,
+++				     struct bpf_insn *insn_buf,
+++				     struct bpf_prog *prog, u32 *target_size)
+++{
+++	return 0;
+++}
+++
+++const struct bpf_verifier_ops sk_filter_verifier_ops = {
+++	.is_valid_access	= sk_filter_is_valid_access,
+++	.convert_ctx_access	= bpf_convert_ctx_access,
+++	.gen_ld_abs		= bpf_gen_ld_abs,
++ };
++ 
++-static const struct bpf_verifier_ops tc_cls_act_ops = {
++-	.get_func_proto = tc_cls_act_func_proto,
++-	.is_valid_access = tc_cls_act_is_valid_access,
++-	.convert_ctx_access = bpf_net_convert_ctx_access,
+++const struct bpf_prog_ops sk_filter_prog_ops = {
++ };
++ 
++-static struct bpf_prog_type_list sk_filter_type __read_mostly = {
++-	.ops = &sk_filter_ops,
++-	.type = BPF_PROG_TYPE_SOCKET_FILTER,
+++const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
+++	.get_func_proto		= tc_cls_act_func_proto,
+++	.is_valid_access	= tc_cls_act_is_valid_access,
+++	.convert_ctx_access	= tc_cls_act_convert_ctx_access,
+++	.gen_prologue		= tc_cls_act_prologue,
+++	.gen_ld_abs		= bpf_gen_ld_abs,
++ };
++ 
++-static struct bpf_prog_type_list sched_cls_type __read_mostly = {
++-	.ops = &tc_cls_act_ops,
++-	.type = BPF_PROG_TYPE_SCHED_CLS,
+++const struct bpf_prog_ops tc_cls_act_prog_ops = {
++ };
++ 
++-static struct bpf_prog_type_list sched_act_type __read_mostly = {
++-	.ops = &tc_cls_act_ops,
++-	.type = BPF_PROG_TYPE_SCHED_ACT,
+++const struct bpf_verifier_ops xdp_verifier_ops = {
+++	.get_func_proto		= xdp_func_proto,
+++	.is_valid_access	= xdp_is_valid_access,
+++	.convert_ctx_access	= xdp_convert_ctx_access,
+++	.gen_prologue		= bpf_noop_prologue,
++ };
++ 
++-static int __init register_sk_filter_ops(void)
++-{
++-	bpf_register_prog_type(&sk_filter_type);
++-	bpf_register_prog_type(&sched_cls_type);
++-	bpf_register_prog_type(&sched_act_type);
+++const struct bpf_verifier_ops lwt_in_verifier_ops = {
+++	.get_func_proto		= lwt_in_func_proto,
+++	.is_valid_access	= lwt_is_valid_access,
+++	.convert_ctx_access	= bpf_convert_ctx_access,
+++};
++ 
++-	return 0;
++-}
++-late_initcall(register_sk_filter_ops);
+++const struct bpf_prog_ops lwt_in_prog_ops = {
+++};
+++
+++const struct bpf_verifier_ops lwt_out_verifier_ops = {
+++	.get_func_proto		= lwt_out_func_proto,
+++	.is_valid_access	= lwt_is_valid_access,
+++	.convert_ctx_access	= bpf_convert_ctx_access,
+++};
+++
+++const struct bpf_prog_ops lwt_out_prog_ops = {
+++};
+++
+++const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
+++	.get_func_proto		= lwt_xmit_func_proto,
+++	.is_valid_access	= lwt_is_valid_access,
+++	.convert_ctx_access	= bpf_convert_ctx_access,
+++	.gen_prologue		= tc_cls_act_prologue,
+++};
+++
+++const struct bpf_prog_ops lwt_xmit_prog_ops = {
+++};
+++
+++const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
+++	.get_func_proto		= lwt_seg6local_func_proto,
+++	.is_valid_access	= lwt_is_valid_access,
+++	.convert_ctx_access	= bpf_convert_ctx_access,
+++};
+++
+++const struct bpf_prog_ops lwt_seg6local_prog_ops = {
+++};
+++
+++const struct bpf_verifier_ops cg_sock_verifier_ops = {
+++	.get_func_proto		= sock_filter_func_proto,
+++	.is_valid_access	= sock_filter_is_valid_access,
+++	.convert_ctx_access	= bpf_sock_convert_ctx_access,
+++};
+++
+++const struct bpf_prog_ops cg_sock_prog_ops = {
+++};
+++
+++const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
+++	.get_func_proto		= sock_addr_func_proto,
+++	.is_valid_access	= sock_addr_is_valid_access,
+++	.convert_ctx_access	= sock_addr_convert_ctx_access,
+++};
++ 
++-int __sk_detach_filter(struct sock *sk, bool locked)
+++const struct bpf_prog_ops cg_sock_addr_prog_ops = {
+++};
+++
+++const struct bpf_verifier_ops sock_ops_verifier_ops = {
+++	.get_func_proto		= sock_ops_func_proto,
+++	.is_valid_access	= sock_ops_is_valid_access,
+++	.convert_ctx_access	= sock_ops_convert_ctx_access,
+++};
+++
+++const struct bpf_prog_ops sock_ops_prog_ops = {
+++};
+++
+++const struct bpf_verifier_ops sk_skb_verifier_ops = {
+++	.get_func_proto		= sk_skb_func_proto,
+++	.is_valid_access	= sk_skb_is_valid_access,
+++	.convert_ctx_access	= sk_skb_convert_ctx_access,
+++	.gen_prologue		= sk_skb_prologue,
+++};
+++
+++const struct bpf_prog_ops sk_skb_prog_ops = {
+++};
+++
+++const struct bpf_verifier_ops sk_msg_verifier_ops = {
+++	.get_func_proto		= sk_msg_func_proto,
+++	.is_valid_access	= sk_msg_is_valid_access,
+++	.convert_ctx_access	= sk_msg_convert_ctx_access,
+++	.gen_prologue		= bpf_noop_prologue,
+++};
+++
+++const struct bpf_prog_ops sk_msg_prog_ops = {
+++};
+++
+++const struct bpf_verifier_ops flow_dissector_verifier_ops = {
+++	.get_func_proto		= flow_dissector_func_proto,
+++	.is_valid_access	= flow_dissector_is_valid_access,
+++	.convert_ctx_access	= flow_dissector_convert_ctx_access,
+++};
+++
+++const struct bpf_prog_ops flow_dissector_prog_ops = {
+++};
+++
+++int sk_detach_filter(struct sock *sk)
++ {
++ 	int ret = -ENOENT;
++ 	struct sk_filter *filter;
++@@ -1928,7 +3664,8 @@ int __sk_detach_filter(struct sock *sk,
++ 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
++ 		return -EPERM;
++ 
++-	filter = rcu_dereference_protected(sk->sk_filter, locked);
+++	filter = rcu_dereference_protected(sk->sk_filter,
+++					   lockdep_sock_is_held(sk));
++ 	if (filter) {
++ 		RCU_INIT_POINTER(sk->sk_filter, NULL);
++ 		sk_filter_uncharge(sk, filter);
++@@ -1937,12 +3674,7 @@ int __sk_detach_filter(struct sock *sk,
++ 
++ 	return ret;
++ }
++-EXPORT_SYMBOL_GPL(__sk_detach_filter);
++-
++-int sk_detach_filter(struct sock *sk)
++-{
++-	return __sk_detach_filter(sk, sock_owned_by_user(sk));
++-}
+++EXPORT_SYMBOL_GPL(sk_detach_filter);
++ 
++ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
++ 		  unsigned int len)
++@@ -1953,7 +3685,7 @@ int sk_get_filter(struct sock *sk, struc
++ 
++ 	lock_sock(sk);
++ 	filter = rcu_dereference_protected(sk->sk_filter,
++-					   sock_owned_by_user(sk));
+++					   lockdep_sock_is_held(sk));
++ 	if (!filter)
++ 		goto out;
++ 
++@@ -1987,3 +3719,5 @@ out:
++ 	release_sock(sk);
++ 	return ret;
++ }
+++
+++
++--- a/include/asm-generic/barrier.h
+++++ b/include/asm-generic/barrier.h
++@@ -119,5 +119,29 @@ do {									\
++ 	___p1;								\
++ })
++ 
+++/**
+++ * smp_cond_load_relaxed() - (Spin) wait for cond with no ordering guarantees
+++ * @ptr: pointer to the variable to wait on
+++ * @cond: boolean expression to wait for
+++ *
+++ * Equivalent to using READ_ONCE() on the condition variable.
+++ *
+++ * Due to C lacking lambda expressions we load the value of *ptr into a
+++ * pre-named variable @VAL to be used in @cond.
+++ */
+++#ifndef smp_cond_load_relaxed
+++#define smp_cond_load_relaxed(ptr, cond_expr) ({		\
+++	typeof(ptr) __PTR = (ptr);				\
+++	typeof(*ptr) VAL;					\
+++	for (;;) {						\
+++		VAL = READ_ONCE(*__PTR);			\
+++		if (cond_expr)					\
+++			break;					\
+++		cpu_relax();					\
+++	}							\
+++	VAL;							\
+++})
+++#endif
+++
++ #endif /* !__ASSEMBLY__ */
++ #endif /* __ASM_GENERIC_BARRIER_H */
++--- a/arch/arm/include/asm/barrier.h
+++++ b/arch/arm/include/asm/barrier.h
++@@ -94,4 +94,6 @@ do {									\
++ #define smp_mb__after_atomic()	smp_mb()
++ 
++ #endif /* !__ASSEMBLY__ */
+++
+++#include <asm-generic/barrier.h>
++ #endif /* __ASM_BARRIER_H */
++--- a/include/linux/list_nulls.h
+++++ b/include/linux/list_nulls.h
++@@ -1,3 +1,4 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
++ #ifndef _LINUX_LIST_NULLS_H
++ #define _LINUX_LIST_NULLS_H
++ 
++@@ -29,6 +30,11 @@ struct hlist_nulls_node {
++ 	((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
++ 
++ #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+++
+++#define hlist_nulls_entry_safe(ptr, type, member) \
+++	({ typeof(ptr) ____ptr = (ptr); \
+++	   !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
+++	})
++ /**
++  * ptr_is_a_nulls - Test if a ptr is a nulls
++  * @ptr: ptr to be tested
++@@ -57,7 +63,7 @@ static inline int hlist_nulls_unhashed(c
++ 
++ static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
++ {
++-	return is_a_nulls(h->first);
+++	return is_a_nulls(READ_ONCE(h->first));
++ }
++ 
++ static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
++@@ -66,10 +72,10 @@ static inline void hlist_nulls_add_head(
++ 	struct hlist_nulls_node *first = h->first;
++ 
++ 	n->next = first;
++-	n->pprev = &h->first;
+++	WRITE_ONCE(n->pprev, &h->first);
++ 	h->first = n;
++ 	if (!is_a_nulls(first))
++-		first->pprev = &n->next;
+++		WRITE_ONCE(first->pprev, &n->next);
++ }
++ 
++ static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
++@@ -79,13 +85,13 @@ static inline void __hlist_nulls_del(str
++ 
++ 	WRITE_ONCE(*pprev, next);
++ 	if (!is_a_nulls(next))
++-		next->pprev = pprev;
+++		WRITE_ONCE(next->pprev, pprev);
++ }
++ 
++ static inline void hlist_nulls_del(struct hlist_nulls_node *n)
++ {
++ 	__hlist_nulls_del(n);
++-	n->pprev = LIST_POISON2;
+++	WRITE_ONCE(n->pprev, LIST_POISON2);
++ }
++ 
++ /**
++--- a/include/linux/rculist_nulls.h
+++++ b/include/linux/rculist_nulls.h
++@@ -1,3 +1,4 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
++ #ifndef _LINUX_RCULIST_NULLS_H
++ #define _LINUX_RCULIST_NULLS_H
++ 
++@@ -33,7 +34,7 @@ static inline void hlist_nulls_del_init_
++ {
++ 	if (!hlist_nulls_unhashed(n)) {
++ 		__hlist_nulls_del(n);
++-		n->pprev = NULL;
+++		WRITE_ONCE(n->pprev, NULL);
++ 	}
++ }
++ 
++@@ -65,7 +66,7 @@ static inline void hlist_nulls_del_init_
++ static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n)
++ {
++ 	__hlist_nulls_del(n);
++-	n->pprev = LIST_POISON2;
+++	WRITE_ONCE(n->pprev, LIST_POISON2);
++ }
++ 
++ /**
++@@ -93,11 +94,49 @@ static inline void hlist_nulls_add_head_
++ 	struct hlist_nulls_node *first = h->first;
++ 
++ 	n->next = first;
++-	n->pprev = &h->first;
+++	WRITE_ONCE(n->pprev, &h->first);
++ 	rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
++ 	if (!is_a_nulls(first))
++-		first->pprev = &n->next;
+++		WRITE_ONCE(first->pprev, &n->next);
++ }
+++
+++/**
+++ * hlist_nulls_add_tail_rcu
+++ * @n: the element to add to the hash list.
+++ * @h: the list to add to.
+++ *
+++ * Description:
+++ * Adds the specified element to the specified hlist_nulls,
+++ * while permitting racing traversals.
+++ *
+++ * The caller must take whatever precautions are necessary
+++ * (such as holding appropriate locks) to avoid racing
+++ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
+++ * or hlist_nulls_del_rcu(), running on this same list.
+++ * However, it is perfectly legal to run concurrently with
+++ * the _rcu list-traversal primitives, such as
+++ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
+++ * problems on Alpha CPUs.  Regardless of the type of CPU, the
+++ * list-traversal primitive must be guarded by rcu_read_lock().
+++ */
+++static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
+++					    struct hlist_nulls_head *h)
+++{
+++	struct hlist_nulls_node *i, *last = NULL;
+++
+++	/* Note: write side code, so rcu accessors are not needed. */
+++	for (i = h->first; !is_a_nulls(i); i = i->next)
+++		last = i;
+++
+++	if (last) {
+++		n->next = last->next;
+++		n->pprev = &last->next;
+++		rcu_assign_pointer(hlist_next_rcu(last), n);
+++	} else {
+++		hlist_nulls_add_head_rcu(n, h);
+++	}
+++}
+++
++ /**
++  * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
++  * @tpos:	the type * to use as a loop cursor.
++@@ -107,7 +146,7 @@ static inline void hlist_nulls_add_head_
++  *
++  * The barrier() is needed to make sure compiler doesn't cache first element [1],
++  * as this loop can be restarted [2]
++- * [1] Documentation/atomic_ops.txt around line 114
+++ * [1] Documentation/core-api/atomic_ops.rst around line 114
++  * [2] Documentation/RCU/rculist_nulls.txt around line 146
++  */
++ #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member)			\
++@@ -117,5 +156,19 @@ static inline void hlist_nulls_add_head_
++ 		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
++ 		pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
++ 
+++/**
+++ * hlist_nulls_for_each_entry_safe -
+++ *   iterate over list of given type safe against removal of list entry
+++ * @tpos:	the type * to use as a loop cursor.
+++ * @pos:	the &struct hlist_nulls_node to use as a loop cursor.
+++ * @head:	the head for your list.
+++ * @member:	the name of the hlist_nulls_node within the struct.
+++ */
+++#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member)		\
+++	for (({barrier();}),							\
+++	     pos = rcu_dereference_raw(hlist_nulls_first_rcu(head));		\
+++		(!is_a_nulls(pos)) &&						\
+++		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member);	\
+++		   pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
++ #endif
++ #endif
++--- a/drivers/net/tun.c
+++++ b/drivers/net/tun.c
++@@ -627,8 +627,9 @@ static int tun_attach(struct tun_struct
++ 
++ 	/* Re-attach the filter to persist device */
++ 	if (!skip_filter && (tun->filter_attached == true)) {
++-		err = __sk_attach_filter(&tun->fprog, tfile->socket.sk,
++-					 lockdep_rtnl_is_held());
+++		lock_sock(tfile->socket.sk);
+++		err = sk_attach_filter(&tun->fprog, tfile->socket.sk);
+++		release_sock(tfile->socket.sk);
++ 		if (!err)
++ 			goto out;
++ 	}
++@@ -1835,7 +1836,9 @@ static void tun_detach_filter(struct tun
++ 
++ 	for (i = 0; i < n; i++) {
++ 		tfile = rtnl_dereference(tun->tfiles[i]);
++-		__sk_detach_filter(tfile->socket.sk, lockdep_rtnl_is_held());
+++		lock_sock(tfile->socket.sk);
+++		sk_detach_filter(tfile->socket.sk);
+++		release_sock(tfile->socket.sk);
++ 	}
++ 
++ 	tun->filter_attached = false;
++@@ -1848,8 +1851,9 @@ static int tun_attach_filter(struct tun_
++ 
++ 	for (i = 0; i < tun->numqueues; i++) {
++ 		tfile = rtnl_dereference(tun->tfiles[i]);
++-		ret = __sk_attach_filter(&tun->fprog, tfile->socket.sk,
++-					 lockdep_rtnl_is_held());
+++		lock_sock(tfile->socket.sk);
+++		ret = sk_attach_filter(&tun->fprog, tfile->socket.sk);
+++		release_sock(tfile->socket.sk);
++ 		if (ret) {
++ 			tun_detach_filter(tun, i);
++ 			return ret;
++--- a/include/linux/list.h
+++++ b/include/linux/list.h
++@@ -1,3 +1,4 @@
+++/* SPDX-License-Identifier: GPL-2.0 */
++ #ifndef _LINUX_LIST_H
++ #define _LINUX_LIST_H
++ 
++@@ -24,31 +25,46 @@
++ 
++ static inline void INIT_LIST_HEAD(struct list_head *list)
++ {
++-	list->next = list;
+++	WRITE_ONCE(list->next, list);
++ 	list->prev = list;
++ }
++ 
+++#ifdef CONFIG_DEBUG_LIST
+++extern bool __list_add_valid(struct list_head *new,
+++			      struct list_head *prev,
+++			      struct list_head *next);
+++extern bool __list_del_entry_valid(struct list_head *entry);
+++#else
+++static inline bool __list_add_valid(struct list_head *new,
+++				struct list_head *prev,
+++				struct list_head *next)
+++{
+++	return true;
+++}
+++static inline bool __list_del_entry_valid(struct list_head *entry)
+++{
+++	return true;
+++}
+++#endif
+++
++ /*
++  * Insert a new entry between two known consecutive entries.
++  *
++  * This is only for internal list manipulation where we know
++  * the prev/next entries already!
++  */
++-#ifndef CONFIG_DEBUG_LIST
++ static inline void __list_add(struct list_head *new,
++ 			      struct list_head *prev,
++ 			      struct list_head *next)
++ {
+++	if (!__list_add_valid(new, prev, next))
+++		return;
+++
++ 	next->prev = new;
++ 	new->next = next;
++ 	new->prev = prev;
++-	prev->next = new;
+++	WRITE_ONCE(prev->next, new);
++ }
++-#else
++-extern void __list_add(struct list_head *new,
++-			      struct list_head *prev,
++-			      struct list_head *next);
++-#endif
++ 
++ /**
++  * list_add - add a new entry
++@@ -90,28 +106,40 @@ static inline void __list_del(struct lis
++ 	WRITE_ONCE(prev->next, next);
++ }
++ 
+++/*
+++ * Delete a list entry and clear the 'prev' pointer.
+++ *
+++ * This is a special-purpose list clearing method used in the networking code
+++ * for lists allocated as per-cpu, where we don't want to incur the extra
+++ * WRITE_ONCE() overhead of a regular list_del_init(). The code that uses this
+++ * needs to check the node 'prev' pointer instead of calling list_empty().
+++ */
+++static inline void __list_del_clearprev(struct list_head *entry)
+++{
+++	__list_del(entry->prev, entry->next);
+++	entry->prev = NULL;
+++}
+++
++ /**
++  * list_del - deletes entry from list.
++  * @entry: the element to delete from the list.
++  * Note: list_empty() on entry does not return true after this, the entry is
++  * in an undefined state.
++  */
++-#ifndef CONFIG_DEBUG_LIST
++ static inline void __list_del_entry(struct list_head *entry)
++ {
+++	if (!__list_del_entry_valid(entry))
+++		return;
+++
++ 	__list_del(entry->prev, entry->next);
++ }
++ 
++ static inline void list_del(struct list_head *entry)
++ {
++-	__list_del(entry->prev, entry->next);
+++	__list_del_entry(entry);
++ 	entry->next = LIST_POISON1;
++ 	entry->prev = LIST_POISON2;
++ }
++-#else
++-extern void __list_del_entry(struct list_head *entry);
++-extern void list_del(struct list_head *entry);
++-#endif
++ 
++ /**
++  * list_replace - replace old entry by new one
++@@ -137,6 +165,23 @@ static inline void list_replace_init(str
++ }
++ 
++ /**
+++ * list_swap - replace entry1 with entry2 and re-add entry1 at entry2's position
+++ * @entry1: the location to place entry2
+++ * @entry2: the location to place entry1
+++ */
+++static inline void list_swap(struct list_head *entry1,
+++			     struct list_head *entry2)
+++{
+++	struct list_head *pos = entry2->prev;
+++
+++	list_del(entry2);
+++	list_replace(entry1, entry2);
+++	if (pos == entry1)
+++		pos = entry2;
+++	list_add(entry1, pos);
+++}
+++
+++/**
++  * list_del_init - deletes entry from list and reinitialize it.
++  * @entry: the element to delete from the list.
++  */
++@@ -170,6 +215,40 @@ static inline void list_move_tail(struct
++ }
++ 
++ /**
+++ * list_bulk_move_tail - move a subsection of a list to its tail
+++ * @head: the head that will follow our entry
+++ * @first: first entry to move
+++ * @last: last entry to move, can be the same as first
+++ *
+++ * Move all entries between @first and including @last before @head.
+++ * All three entries must belong to the same linked list.
+++ */
+++static inline void list_bulk_move_tail(struct list_head *head,
+++				       struct list_head *first,
+++				       struct list_head *last)
+++{
+++	first->prev->next = last->next;
+++	last->next->prev = first->prev;
+++
+++	head->prev->next = first;
+++	first->prev = head->prev;
+++
+++	last->next = head;
+++	head->prev = last;
+++}
+++
+++/**
+++ * list_is_first -- tests whether @list is the first entry in list @head
+++ * @list: the entry to test
+++ * @head: the head of the list
+++ */
+++static inline int list_is_first(const struct list_head *list,
+++					const struct list_head *head)
+++{
+++	return list->prev == head;
+++}
+++
+++/**
++  * list_is_last - tests whether @list is the last entry in list @head
++  * @list: the entry to test
++  * @head: the head of the list
++@@ -186,7 +265,7 @@ static inline int list_is_last(const str
++  */
++ static inline int list_empty(const struct list_head *head)
++ {
++-	return head->next == head;
+++	return READ_ONCE(head->next) == head;
++ }
++ 
++ /**
++@@ -223,6 +302,24 @@ static inline void list_rotate_left(stru
++ }
++ 
++ /**
+++ * list_rotate_to_front() - Rotate list to specific item.
+++ * @list: The desired new front of the list.
+++ * @head: The head of the list.
+++ *
+++ * Rotates list so that @list becomes the new front of the list.
+++ */
+++static inline void list_rotate_to_front(struct list_head *list,
+++					struct list_head *head)
+++{
+++	/*
+++	 * Deletes the list head from the list denoted by @head and
+++	 * places it as the tail of @list, this effectively rotates the
+++	 * list so that @list is at the front.
+++	 */
+++	list_move_tail(head, list);
+++}
+++
+++/**
++  * list_is_singular - tests whether a list has just one entry.
++  * @head: the list to test.
++  */
++@@ -271,6 +368,36 @@ static inline void list_cut_position(str
++ 		__list_cut_position(list, head, entry);
++ }
++ 
+++/**
+++ * list_cut_before - cut a list into two, before given entry
+++ * @list: a new list to add all removed entries
+++ * @head: a list with entries
+++ * @entry: an entry within head, could be the head itself
+++ *
+++ * This helper moves the initial part of @head, up to but
+++ * excluding @entry, from @head to @list.  You should pass
+++ * in @entry an element you know is on @head.  @list should
+++ * be an empty list or a list you do not care about losing
+++ * its data.
+++ * If @entry == @head, all entries on @head are moved to
+++ * @list.
+++ */
+++static inline void list_cut_before(struct list_head *list,
+++				   struct list_head *head,
+++				   struct list_head *entry)
+++{
+++	if (head->next == entry) {
+++		INIT_LIST_HEAD(list);
+++		return;
+++	}
+++	list->next = head->next;
+++	list->next->prev = list;
+++	list->prev = entry->prev;
+++	list->prev->next = list;
+++	head->next = entry;
+++	entry->prev = head;
+++}
+++
++ static inline void __list_splice(const struct list_head *list,
++ 				 struct list_head *prev,
++ 				 struct list_head *next)
++@@ -381,8 +508,11 @@ static inline void list_splice_tail_init
++  *
++  * Note that if the list is empty, it returns NULL.
++  */
++-#define list_first_entry_or_null(ptr, type, member) \
++-	(!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
+++#define list_first_entry_or_null(ptr, type, member) ({ \
+++	struct list_head *head__ = (ptr); \
+++	struct list_head *pos__ = READ_ONCE(head__->next); \
+++	pos__ != head__ ? list_entry(pos__, type, member) : NULL; \
+++})
++ 
++ /**
++  * list_next_entry - get the next element in list
++@@ -511,6 +641,19 @@ static inline void list_splice_tail_init
++ 	     pos = list_next_entry(pos, member))
++ 
++ /**
+++ * list_for_each_entry_from_reverse - iterate backwards over list of given type
+++ *                                    from the current point
+++ * @pos:	the type * to use as a loop cursor.
+++ * @head:	the head for your list.
+++ * @member:	the name of the list_head within the struct.
+++ *
+++ * Iterate backwards over list of given type, continuing from current position.
+++ */
+++#define list_for_each_entry_from_reverse(pos, head, member)		\
+++	for (; &pos->member != (head);					\
+++	     pos = list_prev_entry(pos, member))
+++
+++/**
++  * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
++  * @pos:	the type * to use as a loop cursor.
++  * @n:		another type * to use as temporary storage
++@@ -608,7 +751,7 @@ static inline int hlist_unhashed(const s
++ 
++ static inline int hlist_empty(const struct hlist_head *h)
++ {
++-	return !h->first;
+++	return !READ_ONCE(h->first);
++ }
++ 
++ static inline void __hlist_del(struct hlist_node *n)
++@@ -642,7 +785,7 @@ static inline void hlist_add_head(struct
++ 	n->next = first;
++ 	if (first)
++ 		first->pprev = &n->next;
++-	h->first = n;
+++	WRITE_ONCE(h->first, n);
++ 	n->pprev = &h->first;
++ }
++ 
++@@ -653,7 +796,7 @@ static inline void hlist_add_before(stru
++ 	n->pprev = next->pprev;
++ 	n->next = next;
++ 	next->pprev = &n->next;
++-	*(n->pprev) = n;
+++	WRITE_ONCE(*(n->pprev), n);
++ }
++ 
++ static inline void hlist_add_behind(struct hlist_node *n,
++@@ -679,6 +822,16 @@ static inline bool hlist_fake(struct hli
++ }
++ 
++ /*
+++ * Check whether the node is the only node of the head without
+++ * accessing head:
+++ */
+++static inline bool
+++hlist_is_singular_node(struct hlist_node *n, struct hlist_head *h)
+++{
+++	return !n->next && n->pprev == &h->first;
+++}
+++
+++/*
++  * Move a list from one list head to another. Fixup the pprev
++  * reference of the first entry if it exists.
++  */
++--- /dev/null
+++++ b/include/linux/ptr_ring.h
++@@ -0,0 +1,673 @@
+++/* SPDX-License-Identifier: GPL-2.0-or-later */
+++/*
+++ *	Definitions for the 'struct ptr_ring' datastructure.
+++ *
+++ *	Author:
+++ *		Michael S. Tsirkin <mst@redhat.com>
+++ *
+++ *	Copyright (C) 2016 Red Hat, Inc.
+++ *
+++ *	This is a limited-size FIFO maintaining pointers in FIFO order, with
+++ *	one CPU producing entries and another consuming entries from a FIFO.
+++ *
+++ *	This implementation tries to minimize cache-contention when there is a
+++ *	single producer and a single consumer CPU.
+++ */
+++
+++#ifndef _LINUX_PTR_RING_H
+++#define _LINUX_PTR_RING_H 1
+++
+++#ifdef __KERNEL__
+++#include <linux/spinlock.h>
+++#include <linux/cache.h>
+++#include <linux/types.h>
+++#include <linux/compiler.h>
+++#include <linux/slab.h>
+++#include <asm/errno.h>
+++#endif
+++
+++struct ptr_ring {
+++	int producer ____cacheline_aligned_in_smp;
+++	spinlock_t producer_lock;
+++	int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
+++	int consumer_tail; /* next entry to invalidate */
+++	spinlock_t consumer_lock;
+++	/* Shared consumer/producer data */
+++	/* Read-only by both the producer and the consumer */
+++	int size ____cacheline_aligned_in_smp; /* max entries in queue */
+++	int batch; /* number of entries to consume in a batch */
+++	void **queue;
+++};
+++
+++/* Note: callers invoking this in a loop must use a compiler barrier,
+++ * for example cpu_relax().
+++ *
+++ * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
+++ * see e.g. ptr_ring_full.
+++ */
+++static inline bool __ptr_ring_full(struct ptr_ring *r)
+++{
+++	return r->queue[r->producer];
+++}
+++
+++static inline bool ptr_ring_full(struct ptr_ring *r)
+++{
+++	bool ret;
+++
+++	spin_lock(&r->producer_lock);
+++	ret = __ptr_ring_full(r);
+++	spin_unlock(&r->producer_lock);
+++
+++	return ret;
+++}
+++
+++static inline bool ptr_ring_full_irq(struct ptr_ring *r)
+++{
+++	bool ret;
+++
+++	spin_lock_irq(&r->producer_lock);
+++	ret = __ptr_ring_full(r);
+++	spin_unlock_irq(&r->producer_lock);
+++
+++	return ret;
+++}
+++
+++static inline bool ptr_ring_full_any(struct ptr_ring *r)
+++{
+++	unsigned long flags;
+++	bool ret;
+++
+++	spin_lock_irqsave(&r->producer_lock, flags);
+++	ret = __ptr_ring_full(r);
+++	spin_unlock_irqrestore(&r->producer_lock, flags);
+++
+++	return ret;
+++}
+++
+++static inline bool ptr_ring_full_bh(struct ptr_ring *r)
+++{
+++	bool ret;
+++
+++	spin_lock_bh(&r->producer_lock);
+++	ret = __ptr_ring_full(r);
+++	spin_unlock_bh(&r->producer_lock);
+++
+++	return ret;
+++}
+++
+++/* Note: callers invoking this in a loop must use a compiler barrier,
+++ * for example cpu_relax(). Callers must hold producer_lock.
+++ * Callers are responsible for making sure pointer that is being queued
+++ * points to a valid data.
+++ */
+++static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
+++{
+++	if (unlikely(!r->size) || r->queue[r->producer])
+++		return -ENOSPC;
+++
+++	/* Make sure the pointer we are storing points to a valid data. */
+++	/* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
+++	smp_wmb();
+++
+++	WRITE_ONCE(r->queue[r->producer++], ptr);
+++	if (unlikely(r->producer >= r->size))
+++		r->producer = 0;
+++	return 0;
+++}
+++
+++/*
+++ * Note: resize (below) nests producer lock within consumer lock, so if you
+++ * consume in interrupt or BH context, you must disable interrupts/BH when
+++ * calling this.
+++ */
+++static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
+++{
+++	int ret;
+++
+++	spin_lock(&r->producer_lock);
+++	ret = __ptr_ring_produce(r, ptr);
+++	spin_unlock(&r->producer_lock);
+++
+++	return ret;
+++}
+++
+++static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
+++{
+++	int ret;
+++
+++	spin_lock_irq(&r->producer_lock);
+++	ret = __ptr_ring_produce(r, ptr);
+++	spin_unlock_irq(&r->producer_lock);
+++
+++	return ret;
+++}
+++
+++static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
+++{
+++	unsigned long flags;
+++	int ret;
+++
+++	spin_lock_irqsave(&r->producer_lock, flags);
+++	ret = __ptr_ring_produce(r, ptr);
+++	spin_unlock_irqrestore(&r->producer_lock, flags);
+++
+++	return ret;
+++}
+++
+++static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
+++{
+++	int ret;
+++
+++	spin_lock_bh(&r->producer_lock);
+++	ret = __ptr_ring_produce(r, ptr);
+++	spin_unlock_bh(&r->producer_lock);
+++
+++	return ret;
+++}
+++
+++static inline void *__ptr_ring_peek(struct ptr_ring *r)
+++{
+++	if (likely(r->size))
+++		return READ_ONCE(r->queue[r->consumer_head]);
+++	return NULL;
+++}
+++
+++/*
+++ * Test ring empty status without taking any locks.
+++ *
+++ * NB: This is only safe to call if ring is never resized.
+++ *
+++ * However, if some other CPU consumes ring entries at the same time, the value
+++ * returned is not guaranteed to be correct.
+++ *
+++ * In this case - to avoid incorrectly detecting the ring
+++ * as empty - the CPU consuming the ring entries is responsible
+++ * for either consuming all ring entries until the ring is empty,
+++ * or synchronizing with some other CPU and causing it to
+++ * re-test __ptr_ring_empty and/or consume the ring enteries
+++ * after the synchronization point.
+++ *
+++ * Note: callers invoking this in a loop must use a compiler barrier,
+++ * for example cpu_relax().
+++ */
+++static inline bool __ptr_ring_empty(struct ptr_ring *r)
+++{
+++	if (likely(r->size))
+++		return !r->queue[READ_ONCE(r->consumer_head)];
+++	return true;
+++}
+++
+++static inline bool ptr_ring_empty(struct ptr_ring *r)
+++{
+++	bool ret;
+++
+++	spin_lock(&r->consumer_lock);
+++	ret = __ptr_ring_empty(r);
+++	spin_unlock(&r->consumer_lock);
+++
+++	return ret;
+++}
+++
+++static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
+++{
+++	bool ret;
+++
+++	spin_lock_irq(&r->consumer_lock);
+++	ret = __ptr_ring_empty(r);
+++	spin_unlock_irq(&r->consumer_lock);
+++
+++	return ret;
+++}
+++
+++static inline bool ptr_ring_empty_any(struct ptr_ring *r)
+++{
+++	unsigned long flags;
+++	bool ret;
+++
+++	spin_lock_irqsave(&r->consumer_lock, flags);
+++	ret = __ptr_ring_empty(r);
+++	spin_unlock_irqrestore(&r->consumer_lock, flags);
+++
+++	return ret;
+++}
+++
+++static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
+++{
+++	bool ret;
+++
+++	spin_lock_bh(&r->consumer_lock);
+++	ret = __ptr_ring_empty(r);
+++	spin_unlock_bh(&r->consumer_lock);
+++
+++	return ret;
+++}
+++
+++/* Must only be called after __ptr_ring_peek returned !NULL */
+++static inline void __ptr_ring_discard_one(struct ptr_ring *r)
+++{
+++	/* Fundamentally, what we want to do is update consumer
+++	 * index and zero out the entry so producer can reuse it.
+++	 * Doing it naively at each consume would be as simple as:
+++	 *       consumer = r->consumer;
+++	 *       r->queue[consumer++] = NULL;
+++	 *       if (unlikely(consumer >= r->size))
+++	 *               consumer = 0;
+++	 *       r->consumer = consumer;
+++	 * but that is suboptimal when the ring is full as producer is writing
+++	 * out new entries in the same cache line.  Defer these updates until a
+++	 * batch of entries has been consumed.
+++	 */
+++	/* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
+++	 * to work correctly.
+++	 */
+++	int consumer_head = r->consumer_head;
+++	int head = consumer_head++;
+++
+++	/* Once we have processed enough entries invalidate them in
+++	 * the ring all at once so producer can reuse their space in the ring.
+++	 * We also do this when we reach end of the ring - not mandatory
+++	 * but helps keep the implementation simple.
+++	 */
+++	if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
+++		     consumer_head >= r->size)) {
+++		/* Zero out entries in the reverse order: this way we touch the
+++		 * cache line that producer might currently be reading the last;
+++		 * producer won't make progress and touch other cache lines
+++		 * besides the first one until we write out all entries.
+++		 */
+++		while (likely(head >= r->consumer_tail))
+++			r->queue[head--] = NULL;
+++		r->consumer_tail = consumer_head;
+++	}
+++	if (unlikely(consumer_head >= r->size)) {
+++		consumer_head = 0;
+++		r->consumer_tail = 0;
+++	}
+++	/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
+++	WRITE_ONCE(r->consumer_head, consumer_head);
+++}
+++
+++static inline void *__ptr_ring_consume(struct ptr_ring *r)
+++{
+++	void *ptr;
+++
+++	/* The READ_ONCE in __ptr_ring_peek guarantees that anyone
+++	 * accessing data through the pointer is up to date. Pairs
+++	 * with smp_wmb in __ptr_ring_produce.
+++	 */
+++	ptr = __ptr_ring_peek(r);
+++	if (ptr)
+++		__ptr_ring_discard_one(r);
+++
+++	return ptr;
+++}
+++
+++static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
+++					     void **array, int n)
+++{
+++	void *ptr;
+++	int i;
+++
+++	for (i = 0; i < n; i++) {
+++		ptr = __ptr_ring_consume(r);
+++		if (!ptr)
+++			break;
+++		array[i] = ptr;
+++	}
+++
+++	return i;
+++}
+++
+++/*
+++ * Note: resize (below) nests producer lock within consumer lock, so if you
+++ * call this in interrupt or BH context, you must disable interrupts/BH when
+++ * producing.
+++ */
+++static inline void *ptr_ring_consume(struct ptr_ring *r)
+++{
+++	void *ptr;
+++
+++	spin_lock(&r->consumer_lock);
+++	ptr = __ptr_ring_consume(r);
+++	spin_unlock(&r->consumer_lock);
+++
+++	return ptr;
+++}
+++
+++static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
+++{
+++	void *ptr;
+++
+++	spin_lock_irq(&r->consumer_lock);
+++	ptr = __ptr_ring_consume(r);
+++	spin_unlock_irq(&r->consumer_lock);
+++
+++	return ptr;
+++}
+++
+++static inline void *ptr_ring_consume_any(struct ptr_ring *r)
+++{
+++	unsigned long flags;
+++	void *ptr;
+++
+++	spin_lock_irqsave(&r->consumer_lock, flags);
+++	ptr = __ptr_ring_consume(r);
+++	spin_unlock_irqrestore(&r->consumer_lock, flags);
+++
+++	return ptr;
+++}
+++
+++static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
+++{
+++	void *ptr;
+++
+++	spin_lock_bh(&r->consumer_lock);
+++	ptr = __ptr_ring_consume(r);
+++	spin_unlock_bh(&r->consumer_lock);
+++
+++	return ptr;
+++}
+++
+++static inline int ptr_ring_consume_batched(struct ptr_ring *r,
+++					   void **array, int n)
+++{
+++	int ret;
+++
+++	spin_lock(&r->consumer_lock);
+++	ret = __ptr_ring_consume_batched(r, array, n);
+++	spin_unlock(&r->consumer_lock);
+++
+++	return ret;
+++}
+++
+++static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
+++					       void **array, int n)
+++{
+++	int ret;
+++
+++	spin_lock_irq(&r->consumer_lock);
+++	ret = __ptr_ring_consume_batched(r, array, n);
+++	spin_unlock_irq(&r->consumer_lock);
+++
+++	return ret;
+++}
+++
+++static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
+++					       void **array, int n)
+++{
+++	unsigned long flags;
+++	int ret;
+++
+++	spin_lock_irqsave(&r->consumer_lock, flags);
+++	ret = __ptr_ring_consume_batched(r, array, n);
+++	spin_unlock_irqrestore(&r->consumer_lock, flags);
+++
+++	return ret;
+++}
+++
+++static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
+++					      void **array, int n)
+++{
+++	int ret;
+++
+++	spin_lock_bh(&r->consumer_lock);
+++	ret = __ptr_ring_consume_batched(r, array, n);
+++	spin_unlock_bh(&r->consumer_lock);
+++
+++	return ret;
+++}
+++
+++/* Cast to structure type and call a function without discarding from FIFO.
+++ * Function must return a value.
+++ * Callers must take consumer_lock.
+++ */
+++#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
+++
+++#define PTR_RING_PEEK_CALL(r, f) ({ \
+++	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+++	\
+++	spin_lock(&(r)->consumer_lock); \
+++	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+++	spin_unlock(&(r)->consumer_lock); \
+++	__PTR_RING_PEEK_CALL_v; \
+++})
+++
+++#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
+++	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+++	\
+++	spin_lock_irq(&(r)->consumer_lock); \
+++	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+++	spin_unlock_irq(&(r)->consumer_lock); \
+++	__PTR_RING_PEEK_CALL_v; \
+++})
+++
+++#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
+++	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+++	\
+++	spin_lock_bh(&(r)->consumer_lock); \
+++	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+++	spin_unlock_bh(&(r)->consumer_lock); \
+++	__PTR_RING_PEEK_CALL_v; \
+++})
+++
+++#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
+++	typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
+++	unsigned long __PTR_RING_PEEK_CALL_f;\
+++	\
+++	spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
+++	__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
+++	spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
+++	__PTR_RING_PEEK_CALL_v; \
+++})
+++
+++/* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
+++ * documentation for vmalloc for which of them are legal.
+++ */
+++static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
+++{
+++	if (size > KMALLOC_MAX_SIZE / sizeof(void *))
+++		return NULL;
+++	return kmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
+++}
+++
+++static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
+++{
+++	r->size = size;
+++	r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
+++	/* We need to set batch at least to 1 to make logic
+++	 * in __ptr_ring_discard_one work correctly.
+++	 * Batching too much (because ring is small) would cause a lot of
+++	 * burstiness. Needs tuning, for now disable batching.
+++	 */
+++	if (r->batch > r->size / 2 || !r->batch)
+++		r->batch = 1;
+++}
+++
+++static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
+++{
+++	r->queue = __ptr_ring_init_queue_alloc(size, gfp);
+++	if (!r->queue)
+++		return -ENOMEM;
+++
+++	__ptr_ring_set_size(r, size);
+++	r->producer = r->consumer_head = r->consumer_tail = 0;
+++	spin_lock_init(&r->producer_lock);
+++	spin_lock_init(&r->consumer_lock);
+++
+++	return 0;
+++}
+++
+++/*
+++ * Return entries into ring. Destroy entries that don't fit.
+++ *
+++ * Note: this is expected to be a rare slow path operation.
+++ *
+++ * Note: producer lock is nested within consumer lock, so if you
+++ * resize you must make sure all uses nest correctly.
+++ * In particular if you consume ring in interrupt or BH context, you must
+++ * disable interrupts/BH when doing so.
+++ */
+++static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
+++				      void (*destroy)(void *))
+++{
+++	unsigned long flags;
+++	int head;
+++
+++	spin_lock_irqsave(&r->consumer_lock, flags);
+++	spin_lock(&r->producer_lock);
+++
+++	if (!r->size)
+++		goto done;
+++
+++	/*
+++	 * Clean out buffered entries (for simplicity). This way following code
+++	 * can test entries for NULL and if not assume they are valid.
+++	 */
+++	head = r->consumer_head - 1;
+++	while (likely(head >= r->consumer_tail))
+++		r->queue[head--] = NULL;
+++	r->consumer_tail = r->consumer_head;
+++
+++	/*
+++	 * Go over entries in batch, start moving head back and copy entries.
+++	 * Stop when we run into previously unconsumed entries.
+++	 */
+++	while (n) {
+++		head = r->consumer_head - 1;
+++		if (head < 0)
+++			head = r->size - 1;
+++		if (r->queue[head]) {
+++			/* This batch entry will have to be destroyed. */
+++			goto done;
+++		}
+++		r->queue[head] = batch[--n];
+++		r->consumer_tail = head;
+++		/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
+++		WRITE_ONCE(r->consumer_head, head);
+++	}
+++
+++done:
+++	/* Destroy all entries left in the batch. */
+++	while (n)
+++		destroy(batch[--n]);
+++	spin_unlock(&r->producer_lock);
+++	spin_unlock_irqrestore(&r->consumer_lock, flags);
+++}
+++
+++static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
+++					   int size, gfp_t gfp,
+++					   void (*destroy)(void *))
+++{
+++	int producer = 0;
+++	void **old;
+++	void *ptr;
+++
+++	while ((ptr = __ptr_ring_consume(r)))
+++		if (producer < size)
+++			queue[producer++] = ptr;
+++		else if (destroy)
+++			destroy(ptr);
+++
+++	if (producer >= size)
+++		producer = 0;
+++	__ptr_ring_set_size(r, size);
+++	r->producer = producer;
+++	r->consumer_head = 0;
+++	r->consumer_tail = 0;
+++	old = r->queue;
+++	r->queue = queue;
+++
+++	return old;
+++}
+++
+++/*
+++ * Note: producer lock is nested within consumer lock, so if you
+++ * resize you must make sure all uses nest correctly.
+++ * In particular if you consume ring in interrupt or BH context, you must
+++ * disable interrupts/BH when doing so.
+++ */
+++static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
+++				  void (*destroy)(void *))
+++{
+++	unsigned long flags;
+++	void **queue = __ptr_ring_init_queue_alloc(size, gfp);
+++	void **old;
+++
+++	if (!queue)
+++		return -ENOMEM;
+++
+++	spin_lock_irqsave(&(r)->consumer_lock, flags);
+++	spin_lock(&(r)->producer_lock);
+++
+++	old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
+++
+++	spin_unlock(&(r)->producer_lock);
+++	spin_unlock_irqrestore(&(r)->consumer_lock, flags);
+++
+++	kvfree(old);
+++
+++	return 0;
+++}
+++
+++/*
+++ * Note: producer lock is nested within consumer lock, so if you
+++ * resize you must make sure all uses nest correctly.
+++ * In particular if you consume ring in interrupt or BH context, you must
+++ * disable interrupts/BH when doing so.
+++ */
+++static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
+++					   unsigned int nrings,
+++					   int size,
+++					   gfp_t gfp, void (*destroy)(void *))
+++{
+++	unsigned long flags;
+++	void ***queues;
+++	int i;
+++
+++	queues = kmalloc_array(nrings, sizeof(*queues), gfp);
+++	if (!queues)
+++		goto noqueues;
+++
+++	for (i = 0; i < nrings; ++i) {
+++		queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
+++		if (!queues[i])
+++			goto nomem;
+++	}
+++
+++	for (i = 0; i < nrings; ++i) {
+++		spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
+++		spin_lock(&(rings[i])->producer_lock);
+++		queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
+++						  size, gfp, destroy);
+++		spin_unlock(&(rings[i])->producer_lock);
+++		spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
+++	}
+++
+++	for (i = 0; i < nrings; ++i)
+++		kvfree(queues[i]);
+++
+++	kfree(queues);
+++
+++	return 0;
+++
+++nomem:
+++	while (--i >= 0)
+++		kvfree(queues[i]);
+++
+++	kfree(queues);
+++
+++noqueues:
+++	return -ENOMEM;
+++}
+++
+++static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
+++{
+++	void *ptr;
+++
+++	if (destroy)
+++		while ((ptr = ptr_ring_consume(r)))
+++			destroy(ptr);
+++	kvfree(r->queue);
+++}
+++
+++#endif /* _LINUX_PTR_RING_H  */
++--- a/include/linux/skbuff.h
+++++ b/include/linux/skbuff.h
++@@ -37,6 +37,7 @@
++ #include <net/flow_dissector.h>
++ #include <linux/splice.h>
++ #include <linux/in6.h>
+++#include <linux/if_packet.h>
++ #include <net/flow.h>
++ 
++ /* A. Checksumming of received packets by device.
++@@ -592,13 +593,23 @@ struct sk_buff {
++ 	 */
++ 	kmemcheck_bitfield_begin(flags1);
++ 	__u16			queue_mapping;
+++
+++/* if you move cloned around you also must adapt those constants */
+++#ifdef __BIG_ENDIAN_BITFIELD
+++#define CLONED_MASK	(1 << 7)
+++#else
+++#define CLONED_MASK	1
+++#endif
+++#define CLONED_OFFSET()		offsetof(struct sk_buff, __cloned_offset)
+++
+++	__u8			__cloned_offset[0];
++ 	__u8			cloned:1,
++ 				nohdr:1,
++ 				fclone:2,
++ 				peeked:1,
++ 				head_frag:1,
++-				xmit_more:1;
++-	/* one bit hole */
+++				xmit_more:1,
+++				__unused:1; /* one bit hole */
++ 	kmemcheck_bitfield_end(flags1);
++ 
++ 	/* fields enclosed in headers_start/headers_end are copied
++@@ -639,6 +650,14 @@ struct sk_buff {
++ 	__u8			csum_level:2;
++ 	__u8			csum_bad:1;
++ 
+++#ifdef __BIG_ENDIAN_BITFIELD
+++#define PKT_VLAN_PRESENT_BIT	7
+++#else
+++#define PKT_VLAN_PRESENT_BIT	0
+++#endif
+++#define PKT_VLAN_PRESENT_OFFSET()	offsetof(struct sk_buff, __pkt_vlan_present_offset)
+++	__u8			__pkt_vlan_present_offset[0];
+++	__u8			vlan_present:1;
++ #ifdef CONFIG_IPV6_NDISC_NODETYPE
++ 	__u8			ndisc_nodetype:2;
++ #endif
++@@ -647,7 +666,7 @@ struct sk_buff {
++ 	__u8			remcsum_offload:1;
++ 	__u8			gro_skip:1;
++ 	__u8			fast_forwarded:1;
++-	/* 1 or 3 bit hole */
+++	/* 0 or 2 bit hole */
++ 
++ #ifdef CONFIG_NET_SCHED
++ 	__u16			tc_index;	/* traffic control index */
++@@ -805,6 +824,15 @@ static inline struct rtable *skb_rtable(
++ 	return (struct rtable *)skb_dst(skb);
++ }
++ 
+++/* For mangling skb->pkt_type from user space side from applications
+++ * such as nft, tc, etc, we only allow a conservative subset of
+++ * possible pkt_types to be set.
+++*/
+++static inline bool skb_pkt_type_ok(u32 ptype)
+++{
+++	return ptype <= PACKET_OTHERHOST;
+++}
+++
++ void kfree_skb(struct sk_buff *skb);
++ void kfree_skb_list(struct sk_buff *segs);
++ void skb_tx_error(struct sk_buff *skb);
++@@ -2127,6 +2155,11 @@ static inline unsigned char *skb_mac_hea
++ 	return skb->head + skb->mac_header;
++ }
++ 
+++static inline u32 skb_mac_header_len(const struct sk_buff *skb)
+++{
+++	return skb->network_header - skb->mac_header;
+++}
+++
++ static inline int skb_mac_header_was_set(const struct sk_buff *skb)
++ {
++ 	return skb->mac_header != (typeof(skb->mac_header))~0U;
++@@ -2256,7 +2289,7 @@ static inline int pskb_network_may_pull(
++ 
++ int ___pskb_trim(struct sk_buff *skb, unsigned int len);
++ 
++-static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+++static inline void __skb_set_length(struct sk_buff *skb, unsigned int len)
++ {
++ 	if (unlikely(skb_is_nonlinear(skb))) {
++ 		WARN_ON(1);
++@@ -2266,6 +2299,11 @@ static inline void __skb_trim(struct sk_
++ 	skb_set_tail_pointer(skb, len);
++ }
++ 
+++static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+++{
+++	__skb_set_length(skb, len);
+++}
+++
++ void skb_trim(struct sk_buff *skb, unsigned int len);
++ 
++ static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
++@@ -2318,6 +2356,20 @@ static inline struct sk_buff *skb_reduce
++ 	return skb;
++ }
++ 
+++static inline int __skb_grow(struct sk_buff *skb, unsigned int len)
+++{
+++	unsigned int diff = len - skb->len;
+++
+++	if (skb_tailroom(skb) < diff) {
+++		int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb),
+++					   GFP_ATOMIC);
+++		if (ret)
+++			return ret;
+++	}
+++	__skb_set_length(skb, len);
+++	return 0;
+++}
+++
++ /**
++  *	skb_orphan - orphan a buffer
++  *	@skb: buffer to orphan
++@@ -2818,6 +2870,18 @@ static inline int skb_linearize_cow(stru
++ 	       __skb_linearize(skb) : 0;
++ }
++ 
+++static __always_inline void
+++__skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
+++		     unsigned int off)
+++{
+++	if (skb->ip_summed == CHECKSUM_COMPLETE)
+++		skb->csum = csum_block_sub(skb->csum,
+++					   csum_partial(start, len, 0), off);
+++	else if (skb->ip_summed == CHECKSUM_PARTIAL &&
+++		 skb_checksum_start_offset(skb) < 0)
+++		skb->ip_summed = CHECKSUM_NONE;
+++}
+++
++ /**
++  *	skb_postpull_rcsum - update checksum for received skb after pull
++  *	@skb: buffer to update
++@@ -2828,36 +2892,38 @@ static inline int skb_linearize_cow(stru
++  *	update the CHECKSUM_COMPLETE checksum, or set ip_summed to
++  *	CHECKSUM_NONE so that it can be recomputed from scratch.
++  */
++-
++ static inline void skb_postpull_rcsum(struct sk_buff *skb,
++ 				      const void *start, unsigned int len)
++ {
++-	if (skb->ip_summed == CHECKSUM_COMPLETE)
++-		skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
++-	else if (skb->ip_summed == CHECKSUM_PARTIAL &&
++-		 skb_checksum_start_offset(skb) < 0)
++-		skb->ip_summed = CHECKSUM_NONE;
+++	__skb_postpull_rcsum(skb, start, len, 0);
++ }
++ 
++-unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+++static __always_inline void
+++__skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
+++		     unsigned int off)
+++{
+++	if (skb->ip_summed == CHECKSUM_COMPLETE)
+++		skb->csum = csum_block_add(skb->csum,
+++					   csum_partial(start, len, 0), off);
+++}
++ 
+++/**
+++ *	skb_postpush_rcsum - update checksum for received skb after push
+++ *	@skb: buffer to update
+++ *	@start: start of data after push
+++ *	@len: length of data pushed
+++ *
+++ *	After doing a push on a received packet, you need to call this to
+++ *	update the CHECKSUM_COMPLETE checksum.
+++ */
++ static inline void skb_postpush_rcsum(struct sk_buff *skb,
++ 				      const void *start, unsigned int len)
++ {
++-	/* For performing the reverse operation to skb_postpull_rcsum(),
++-	 * we can instead of ...
++-	 *
++-	 *   skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
++-	 *
++-	 * ... just use this equivalent version here to save a few
++-	 * instructions. Feeding csum of 0 in csum_partial() and later
++-	 * on adding skb->csum is equivalent to feed skb->csum in the
++-	 * first place.
++-	 */
++-	if (skb->ip_summed == CHECKSUM_COMPLETE)
++-		skb->csum = csum_partial(start, len, skb->csum);
+++	__skb_postpush_rcsum(skb, start, len, 0);
++ }
++ 
+++unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+++
++ /**
++  *	skb_push_rcsum - push skb and update receive checksum
++  *	@skb: buffer to update
++@@ -2901,6 +2967,21 @@ static inline int pskb_trim_rcsum(struct
++ #define skb_rb_next(skb)   rb_to_skb(rb_next(&(skb)->rbnode))
++ #define skb_rb_prev(skb)   rb_to_skb(rb_prev(&(skb)->rbnode))
++ 
+++static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len)
+++{
+++	if (skb->ip_summed == CHECKSUM_COMPLETE)
+++		skb->ip_summed = CHECKSUM_NONE;
+++	__skb_trim(skb, len);
+++	return 0;
+++}
+++
+++static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
+++{
+++	if (skb->ip_summed == CHECKSUM_COMPLETE)
+++		skb->ip_summed = CHECKSUM_NONE;
+++	return __skb_grow(skb, len);
+++}
+++
++ #define skb_queue_walk(queue, skb) \
++ 		for (skb = (queue)->next;					\
++ 		     skb != (struct sk_buff *)(queue);				\
++@@ -3662,6 +3743,13 @@ static inline bool skb_is_gso_v6(const s
++ 	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
++ }
++ 
+++static inline void skb_gso_reset(struct sk_buff *skb)
+++{
+++	skb_shinfo(skb)->gso_size = 0;
+++	skb_shinfo(skb)->gso_segs = 0;
+++	skb_shinfo(skb)->gso_type = 0;
+++}
+++
++ void __skb_warn_lro_forwarding(const struct sk_buff *skb);
++ 
++ static inline bool skb_warn_if_lro(const struct sk_buff *skb)
++--- a/include/linux/if_arp.h
+++++ b/include/linux/if_arp.h
++@@ -44,4 +44,21 @@ static inline int arp_hdr_len(struct net
++ 		return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2;
++ 	}
++ }
+++
+++static inline bool dev_is_mac_header_xmit(const struct net_device *dev)
+++{
+++	switch (dev->type) {
+++	case ARPHRD_TUNNEL:
+++	case ARPHRD_TUNNEL6:
+++	case ARPHRD_SIT:
+++	case ARPHRD_IPGRE:
+++	case ARPHRD_VOID:
+++	case ARPHRD_NONE:
+++	case ARPHRD_RAWIP:
+++		return false;
+++	default:
+++		return true;
+++	}
+++}
+++
++ #endif	/* _LINUX_IF_ARP_H */
++--- a/include/linux/if_vlan.h
+++++ b/include/linux/if_vlan.h
++@@ -66,7 +66,6 @@ static inline struct vlan_ethhdr *vlan_e
++ #define VLAN_PRIO_MASK		0xe000 /* Priority Code Point */
++ #define VLAN_PRIO_SHIFT		13
++ #define VLAN_CFI_MASK		0x1000 /* Canonical Format Indicator */
++-#define VLAN_TAG_PRESENT	VLAN_CFI_MASK
++ #define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
++ #define VLAN_N_VID		4096
++ 
++@@ -78,8 +77,8 @@ static inline bool is_vlan_dev(struct ne
++         return dev->priv_flags & IFF_802_1Q_VLAN;
++ }
++ 
++-#define skb_vlan_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
++-#define skb_vlan_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
+++#define skb_vlan_tag_present(__skb)	((__skb)->vlan_present)
+++#define skb_vlan_tag_get(__skb)		((__skb)->vlan_tci)
++ #define skb_vlan_tag_get_id(__skb)	((__skb)->vlan_tci & VLAN_VID_MASK)
++ 
++ /**
++@@ -376,6 +375,31 @@ static inline struct sk_buff *vlan_inser
++ 	return skb;
++ }
++ 
+++/**
+++ * __vlan_hwaccel_clear_tag - clear hardware accelerated VLAN info
+++ * @skb: skbuff to clear
+++ *
+++ * Clears the VLAN information from @skb
+++ */
+++static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb)
+++{
+++	skb->vlan_present = 0;
+++}
+++
+++/**
+++ * __vlan_hwaccel_copy_tag - copy hardware accelerated VLAN info from another skb
+++ * @dst: skbuff to copy to
+++ * @src: skbuff to copy from
+++ *
+++ * Copies VLAN information from @src to @dst (for branchless code)
+++ */
+++static inline void __vlan_hwaccel_copy_tag(struct sk_buff *dst, const struct sk_buff *src)
+++{
+++	dst->vlan_present = src->vlan_present;
+++	dst->vlan_proto = src->vlan_proto;
+++	dst->vlan_tci = src->vlan_tci;
+++}
+++
++ /*
++  * __vlan_hwaccel_push_inside - pushes vlan tag to the payload
++  * @skb: skbuff to tag
++@@ -390,7 +414,7 @@ static inline struct sk_buff *__vlan_hwa
++ 	skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
++ 					skb_vlan_tag_get(skb));
++ 	if (likely(skb))
++-		skb->vlan_tci = 0;
+++		__vlan_hwaccel_clear_tag(skb);
++ 	return skb;
++ }
++ /*
++@@ -422,7 +446,8 @@ static inline void __vlan_hwaccel_put_ta
++ 					  __be16 vlan_proto, u16 vlan_tci)
++ {
++ 	skb->vlan_proto = vlan_proto;
++-	skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci;
+++	skb->vlan_tci = vlan_tci;
+++	skb->vlan_present = 1;
++ }
++ 
++ /**
++--- a/include/net/checksum.h
+++++ b/include/net/checksum.h
++@@ -120,6 +120,11 @@ static inline __wsum csum_partial_ext(co
++ 
++ #define CSUM_MANGLED_0 ((__force __sum16)0xffff)
++ 
+++static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
+++{
+++	*sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
+++}
+++
++ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
++ {
++ 	__wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from);
++--- a/lib/test_bpf.c
+++++ b/lib/test_bpf.c
++@@ -38,6 +38,7 @@
++ #define SKB_HASH	0x1234aaab
++ #define SKB_QUEUE_MAP	123
++ #define SKB_VLAN_TCI	0xffff
+++#define SKB_VLAN_PRESENT	1
++ #define SKB_DEV_IFINDEX	577
++ #define SKB_DEV_TYPE	588
++ 
++@@ -691,8 +692,8 @@ static struct bpf_test tests[] = {
++ 		CLASSIC,
++ 		{ },
++ 		{
++-			{ 1, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT },
++-			{ 10, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT }
+++			{ 1, SKB_VLAN_TCI },
+++			{ 10, SKB_VLAN_TCI }
++ 		},
++ 	},
++ 	{
++@@ -705,8 +706,8 @@ static struct bpf_test tests[] = {
++ 		CLASSIC,
++ 		{ },
++ 		{
++-			{ 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) },
++-			{ 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }
+++			{ 1, SKB_VLAN_PRESENT },
+++			{ 10, SKB_VLAN_PRESENT }
++ 		},
++ 	},
++ 	{
++@@ -4432,8 +4433,8 @@ static struct bpf_test tests[] = {
++ 		CLASSIC,
++ 		{ },
++ 		{
++-			{  1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) },
++-			{ 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }
+++			{  1, SKB_VLAN_PRESENT },
+++			{ 10, SKB_VLAN_PRESENT }
++ 		},
++ 		.fill_helper = bpf_fill_maxinsns6,
++ 	},
++@@ -5144,6 +5145,7 @@ static struct sk_buff *populate_skb(char
++ 	skb->hash = SKB_HASH;
++ 	skb->queue_mapping = SKB_QUEUE_MAP;
++ 	skb->vlan_tci = SKB_VLAN_TCI;
+++	skb->vlan_present = SKB_VLAN_PRESENT;
++ 	skb->dev = &dev;
++ 	skb->dev->ifindex = SKB_DEV_IFINDEX;
++ 	skb->dev->type = SKB_DEV_TYPE;
++--- a/include/linux/netdevice.h
+++++ b/include/linux/netdevice.h
++@@ -3171,6 +3171,21 @@ int __dev_forward_skb(struct net_device
++ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
++ bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb);
++ 
+++static __always_inline int ____dev_forward_skb(struct net_device *dev,
+++					       struct sk_buff *skb)
+++{
+++	if (skb_orphan_frags(skb, GFP_ATOMIC) ||
+++	    unlikely(!is_skb_forwardable(dev, skb))) {
+++		atomic_long_inc(&dev->rx_dropped);
+++		kfree_skb(skb);
+++		return NET_RX_DROP;
+++	}
+++
+++	skb_scrub_packet(skb, true);
+++	skb->priority = 0;
+++	return 0;
+++}
+++
++ extern int		netdev_budget;
++ 
++ /* Called by rtnetlink.c:rtnl_unlock() */
++--- a/net/openvswitch/actions.c
+++++ b/net/openvswitch/actions.c
++@@ -246,7 +246,7 @@ static int push_vlan(struct sk_buff *skb
++ 	else
++ 		key->eth.tci = vlan->vlan_tci;
++ 	return skb_vlan_push(skb, vlan->vlan_tpid,
++-			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
+++			     ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
++ }
++ 
++ /* 'src' is already properly masked. */
++--- a/net/openvswitch/flow.c
+++++ b/net/openvswitch/flow.c
++@@ -318,7 +318,7 @@ static int parse_vlan(struct sk_buff *sk
++ 		return -ENOMEM;
++ 
++ 	qp = (struct qtag_prefix *) skb->data;
++-	key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
+++	key->eth.tci = qp->tci | htons(VLAN_CFI_MASK);
++ 	__skb_pull(skb, sizeof(struct qtag_prefix));
++ 
++ 	return 0;
++--- a/net/openvswitch/flow.h
+++++ b/net/openvswitch/flow.h
++@@ -69,7 +69,7 @@ struct sw_flow_key {
++ 	struct {
++ 		u8     src[ETH_ALEN];	/* Ethernet source address. */
++ 		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
++-		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+++		__be16 tci;		/* 0 if no VLAN, VLAN_CFI_MASK set otherwise. */
++ 		__be16 type;		/* Ethernet frame type. */
++ 	} eth;
++ 	union {
++--- a/net/openvswitch/flow_netlink.c
+++++ b/net/openvswitch/flow_netlink.c
++@@ -925,11 +925,11 @@ static int ovs_key_from_nlattrs(struct n
++ 		__be16 tci;
++ 
++ 		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
++-		if (!(tci & htons(VLAN_TAG_PRESENT))) {
+++		if (!(tci & htons(VLAN_CFI_MASK))) {
++ 			if (is_mask)
++-				OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.");
+++				OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_CFI_MASK bit.");
++ 			else
++-				OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set.");
+++				OVS_NLERR(log, "VLAN TCI does not have VLAN_CFI_MASK bit set.");
++ 
++ 			return -EINVAL;
++ 		}
++@@ -1209,7 +1209,7 @@ int ovs_nla_get_match(struct net *net, s
++ 		key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
++ 		encap_valid = true;
++ 
++-		if (tci & htons(VLAN_TAG_PRESENT)) {
+++		if (tci & htons(VLAN_CFI_MASK)) {
++ 			err = parse_flow_nlattrs(encap, a, &key_attrs, log);
++ 			if (err)
++ 				return err;
++@@ -1297,7 +1297,7 @@ int ovs_nla_get_match(struct net *net, s
++ 			if (a[OVS_KEY_ATTR_VLAN])
++ 				tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
++ 
++-			if (!(tci & htons(VLAN_TAG_PRESENT))) {
+++			if (!(tci & htons(VLAN_CFI_MASK))) {
++ 				OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).",
++ 					  ntohs(tci));
++ 				err = -EINVAL;
++@@ -2272,7 +2272,7 @@ static int __ovs_nla_copy_actions(struct
++ 			vlan = nla_data(a);
++ 			if (vlan->vlan_tpid != htons(ETH_P_8021Q))
++ 				return -EINVAL;
++-			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
+++			if (!(vlan->vlan_tci & htons(VLAN_CFI_MASK)))
++ 				return -EINVAL;
++ 			vlan_tci = vlan->vlan_tci;
++ 			break;
++@@ -2288,7 +2288,7 @@ static int __ovs_nla_copy_actions(struct
++ 			/* Prohibit push MPLS other than to a white list
++ 			 * for packets that have a known tag order.
++ 			 */
++-			if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
+++			if (vlan_tci & htons(VLAN_CFI_MASK) ||
++ 			    (eth_type != htons(ETH_P_IP) &&
++ 			     eth_type != htons(ETH_P_IPV6) &&
++ 			     eth_type != htons(ETH_P_ARP) &&
++@@ -2300,7 +2300,7 @@ static int __ovs_nla_copy_actions(struct
++ 		}
++ 
++ 		case OVS_ACTION_ATTR_POP_MPLS:
++-			if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
+++			if (vlan_tci & htons(VLAN_CFI_MASK) ||
++ 			    !eth_p_mpls(eth_type))
++ 				return -EINVAL;
++ 
++--- a/net/sched/act_bpf.c
+++++ b/net/sched/act_bpf.c
++@@ -220,7 +220,7 @@ static int tcf_bpf_init_from_efd(struct
++ 
++ 	bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);
++ 
++-	fp = bpf_prog_get(bpf_fd);
+++	fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_ACT, false);
++ 	if (IS_ERR(fp))
++ 		return PTR_ERR(fp);
++ 
++--- a/net/sched/cls_bpf.c
+++++ b/net/sched/cls_bpf.c
++@@ -267,7 +267,7 @@ static int cls_bpf_prog_from_efd(struct
++ 
++ 	bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
++ 
++-	fp = bpf_prog_get(bpf_fd);
+++	fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, false);
++ 	if (IS_ERR(fp))
++ 		return PTR_ERR(fp);
++ 
 diff --git a/toolchain/kernel-headers/Makefile b/toolchain/kernel-headers/Makefile
 index c33f26d46d..06236b5a47 100644
 --- a/toolchain/kernel-headers/Makefile