Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
authorJakub Kicinski <kuba@kernel.org>
Mon, 12 Oct 2020 23:16:50 +0000 (16:16 -0700)
committerJakub Kicinski <kuba@kernel.org>
Mon, 12 Oct 2020 23:16:50 +0000 (16:16 -0700)
Alexei Starovoitov says:

====================
pull-request: bpf-next 2020-10-12

The main changes are:

1) The BPF verifier improvements to track register allocation pattern, from Alexei and Yonghong.

2) libbpf relocation support for different size load/store, from Andrii.

3) bpf_redirect_peer() helper and support for inner map array with different max_entries, from Daniel.

4) BPF support for per-cpu variables, form Hao.

5) sockmap improvements, from John.
====================

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1  2 
MAINTAINERS
include/linux/netdevice.h
kernel/bpf/verifier.c
net/core/dev.c
net/core/filter.c
tools/lib/bpf/libbpf.c

diff --combined MAINTAINERS
@@@ -1460,11 -1460,6 +1460,11 @@@ S:    Odd Fixe
  F:    drivers/amba/
  F:    include/linux/amba/bus.h
  
 +ARM PRIMECELL CLCD PL110 DRIVER
 +M:    Russell King <linux@armlinux.org.uk>
 +S:    Odd Fixes
 +F:    drivers/video/fbdev/amba-clcd.*
 +
  ARM PRIMECELL KMI PL050 DRIVER
  M:    Russell King <linux@armlinux.org.uk>
  S:    Odd Fixes
@@@ -3263,7 -3258,7 +3263,7 @@@ M:      Daniel Borkmann <daniel@iogearbox.ne
  R:    Martin KaFai Lau <kafai@fb.com>
  R:    Song Liu <songliubraving@fb.com>
  R:    Yonghong Song <yhs@fb.com>
- R:    Andrii Nakryiko <andriin@fb.com>
+ R:    Andrii Nakryiko <andrii@kernel.org>
  R:    John Fastabend <john.fastabend@gmail.com>
  R:    KP Singh <kpsingh@chromium.org>
  L:    netdev@vger.kernel.org
@@@ -3917,7 -3912,6 +3917,7 @@@ F:      include/net/netns/can.
  F:    include/uapi/linux/can.h
  F:    include/uapi/linux/can/bcm.h
  F:    include/uapi/linux/can/gw.h
 +F:    include/uapi/linux/can/isotp.h
  F:    include/uapi/linux/can/raw.h
  F:    net/can/
  
@@@ -6531,9 -6525,9 +6531,9 @@@ F:      Documentation/devicetree/bindings/ne
  F:    Documentation/devicetree/bindings/net/qca,ar803x.yaml
  F:    Documentation/networking/phy.rst
  F:    drivers/net/mdio/
 +F:    drivers/net/mdio/of_mdio.c
  F:    drivers/net/pcs/
  F:    drivers/net/phy/
 -F:    drivers/of/of_mdio.c
  F:    drivers/of/of_net.c
  F:    include/dt-bindings/net/qca-ar803x.h
  F:    include/linux/*mdio*.h
@@@ -8770,8 -8764,7 +8770,8 @@@ F:      include/drm/i915
  F:    include/uapi/drm/i915_drm.h
  
  INTEL ETHERNET DRIVERS
 -M:    Jeff Kirsher <jeffrey.t.kirsher@intel.com>
 +M:    Jesse Brandeburg <jesse.brandeburg@intel.com>
 +M:    Tony Nguyen <anthony.l.nguyen@intel.com>
  L:    intel-wired-lan@lists.osuosl.org (moderated for non-subscribers)
  S:    Supported
  W:    http://www.intel.com/support/feedback.htm
@@@ -10681,14 -10674,14 +10681,14 @@@ L:        linux-input@vger.kernel.or
  S:    Maintained
  F:    drivers/hid/hid-mcp2221.c
  
 -MCP25XXFD SPI-CAN NETWORK DRIVER
 +MCP251XFD SPI-CAN NETWORK DRIVER
  M:    Marc Kleine-Budde <mkl@pengutronix.de>
  M:    Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
  R:    Thomas Kopp <thomas.kopp@microchip.com>
  L:    linux-can@vger.kernel.org
  S:    Maintained
 -F:    Documentation/devicetree/bindings/net/can/microchip,mcp25xxfd.yaml
 -F:    drivers/net/can/spi/mcp25xxfd/
 +F:    Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml
 +F:    drivers/net/can/spi/mcp251xfd/
  
  MCP4018 AND MCP4531 MICROCHIP DIGITAL POTENTIOMETER DRIVERS
  M:    Peter Rosin <peda@axentia.se>
@@@ -12111,7 -12104,6 +12111,7 @@@ NETWORKING [DSA
  M:    Andrew Lunn <andrew@lunn.ch>
  M:    Vivien Didelot <vivien.didelot@gmail.com>
  M:    Florian Fainelli <f.fainelli@gmail.com>
 +M:    Vladimir Oltean <olteanv@gmail.com>
  S:    Maintained
  F:    Documentation/devicetree/bindings/net/dsa/
  F:    drivers/net/dsa/
@@@ -12166,7 -12158,6 +12166,7 @@@ F:   net/ipv6/ipcomp6.
  F:    net/ipv6/xfrm*
  F:    net/key/
  F:    net/xfrm/
 +F:    tools/testing/selftests/net/ipsec.c
  
  NETWORKING [IPv4/IPv6]
  M:    "David S. Miller" <davem@davemloft.net>
@@@ -12550,7 -12541,6 +12550,7 @@@ F:   drivers/net/dsa/ocelot/
  F:    drivers/net/ethernet/mscc/
  F:    include/soc/mscc/ocelot*
  F:    net/dsa/tag_ocelot.c
 +F:    tools/testing/selftests/drivers/net/ocelot/*
  
  OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER
  M:    Frederic Barrat <fbarrat@linux.ibm.com>
@@@ -13219,7 -13209,6 +13219,7 @@@ F:   drivers/firmware/pcdp.
  
  PCI DRIVER FOR AARDVARK (Marvell Armada 3700)
  M:    Thomas Petazzoni <thomas.petazzoni@bootlin.com>
 +M:    Pali Rohár <pali@kernel.org>
  L:    linux-pci@vger.kernel.org
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
@@@ -16194,7 -16183,7 +16194,7 @@@ M:   Leon Luo <leonl@leopardimaging.com
  L:    linux-media@vger.kernel.org
  S:    Maintained
  T:    git git://linuxtv.org/media_tree.git
 -F:    Documentation/devicetree/bindings/media/i2c/imx274.txt
 +F:    Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml
  F:    drivers/media/i2c/imx274.c
  
  SONY IMX290 SENSOR DRIVER
@@@ -18321,8 -18310,7 +18321,8 @@@ F:   drivers/gpu/vga/vga_switcheroo.
  F:    include/linux/vga_switcheroo.h
  
  VIA RHINE NETWORK DRIVER
 -S:    Orphan
 +S:    Maintained
 +M:    Kevin Brace <kevinbrace@bracecomputerlab.com>
  F:    drivers/net/ethernet/via/via-rhine.c
  
  VIA SD/MMC CARD CONTROLLER DRIVER
@@@ -18927,10 -18915,10 +18927,10 @@@ T:        git git://git.kernel.org/pub/scm/lin
  F:    arch/x86/mm/
  
  X86 PLATFORM DRIVERS
 -M:    Darren Hart <dvhart@infradead.org>
 -M:    Andy Shevchenko <andy@infradead.org>
 +M:    Hans de Goede <hdegoede@redhat.com>
 +M:    Mark Gross <mgross@linux.intel.com>
  L:    platform-driver-x86@vger.kernel.org
 -S:    Odd Fixes
 +S:    Maintained
  T:    git git://git.infradead.org/linux-platform-drivers-x86.git
  F:    drivers/platform/olpc/
  F:    drivers/platform/x86/
@@@ -212,8 -212,9 +212,8 @@@ struct netdev_hw_addr 
        unsigned char           type;
  #define NETDEV_HW_ADDR_T_LAN          1
  #define NETDEV_HW_ADDR_T_SAN          2
 -#define NETDEV_HW_ADDR_T_SLAVE                3
 -#define NETDEV_HW_ADDR_T_UNICAST      4
 -#define NETDEV_HW_ADDR_T_MULTICAST    5
 +#define NETDEV_HW_ADDR_T_UNICAST      3
 +#define NETDEV_HW_ADDR_T_MULTICAST    4
        bool                    global_use;
        int                     sync_cnt;
        int                     refcount;
@@@ -1276,6 -1277,9 +1276,9 @@@ struct netdev_net_notifier 
   * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
   *                     int cmd);
   *    Add, change, delete or get information on an IPv4 tunnel.
+  * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
+  *    If a device is paired with a peer device, return the peer instance.
+  *    The caller must be under RCU read context.
   */
  struct net_device_ops {
        int                     (*ndo_init)(struct net_device *dev);
        struct devlink_port *   (*ndo_get_devlink_port)(struct net_device *dev);
        int                     (*ndo_tunnel_ctl)(struct net_device *dev,
                                                  struct ip_tunnel_parm *p, int cmd);
+       struct net_device *     (*ndo_get_peer_dev)(struct net_device *dev);
  };
  
  /**
@@@ -1842,11 -1847,6 +1846,11 @@@ enum netdev_priv_flags 
   *    @udp_tunnel_nic:        UDP tunnel offload state
   *    @xdp_state:             stores info on attached XDP BPF programs
   *
 + *    @nested_level:  Used as as a parameter of spin_lock_nested() of
 + *                    dev->addr_list_lock.
 + *    @unlink_list:   As netif_addr_lock() can be called recursively,
 + *                    keep a list of interfaces to be deleted.
 + *
   *    FIXME: cleanup struct net_device such that network protocol info
   *    moves out.
   */
@@@ -1951,7 -1951,6 +1955,7 @@@ struct net_device 
        unsigned short          type;
        unsigned short          hard_header_len;
        unsigned char           min_header_len;
 +      unsigned char           name_assign_type;
  
        unsigned short          needed_headroom;
        unsigned short          needed_tailroom;
        unsigned char           addr_len;
        unsigned char           upper_level;
        unsigned char           lower_level;
 +
        unsigned short          neigh_priv_len;
        unsigned short          dev_id;
        unsigned short          dev_port;
        spinlock_t              addr_list_lock;
 -      unsigned char           name_assign_type;
 -      bool                    uc_promisc;
 +
        struct netdev_hw_addr_list      uc;
        struct netdev_hw_addr_list      mc;
        struct netdev_hw_addr_list      dev_addrs;
  
  #ifdef CONFIG_SYSFS
        struct kset             *queues_kset;
 +#endif
 +#ifdef CONFIG_LOCKDEP
 +      struct list_head        unlink_list;
  #endif
        unsigned int            promiscuity;
        unsigned int            allmulti;
 +      bool                    uc_promisc;
 +#ifdef CONFIG_LOCKDEP
 +      unsigned char           nested_level;
 +#endif
  
  
        /* Protocol-specific pointers */
@@@ -2543,16 -2535,6 +2547,16 @@@ struct pcpu_lstats 
  
  void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes);
  
 +static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int len)
 +{
 +      struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
 +
 +      u64_stats_update_begin(&tstats->syncp);
 +      tstats->rx_bytes += len;
 +      tstats->rx_packets++;
 +      u64_stats_update_end(&tstats->syncp);
 +}
 +
  static inline void dev_lstats_add(struct net_device *dev, unsigned int len)
  {
        struct pcpu_lstats *lstats = this_cpu_ptr(dev->lstats);
@@@ -4304,23 -4286,17 +4308,23 @@@ static inline void netif_tx_disable(str
  
  static inline void netif_addr_lock(struct net_device *dev)
  {
 -      spin_lock(&dev->addr_list_lock);
 -}
 +      unsigned char nest_level = 0;
  
 -static inline void netif_addr_lock_nested(struct net_device *dev)
 -{
 -      spin_lock_nested(&dev->addr_list_lock, dev->lower_level);
 +#ifdef CONFIG_LOCKDEP
 +      nest_level = dev->nested_level;
 +#endif
 +      spin_lock_nested(&dev->addr_list_lock, nest_level);
  }
  
  static inline void netif_addr_lock_bh(struct net_device *dev)
  {
 -      spin_lock_bh(&dev->addr_list_lock);
 +      unsigned char nest_level = 0;
 +
 +#ifdef CONFIG_LOCKDEP
 +      nest_level = dev->nested_level;
 +#endif
 +      local_bh_disable();
 +      spin_lock_nested(&dev->addr_list_lock, nest_level);
  }
  
  static inline void netif_addr_unlock(struct net_device *dev)
@@@ -4505,38 -4481,12 +4509,38 @@@ extern int           dev_rx_weight
  extern int            dev_tx_weight;
  extern int            gro_normal_batch;
  
 +enum {
 +      NESTED_SYNC_IMM_BIT,
 +      NESTED_SYNC_TODO_BIT,
 +};
 +
 +#define __NESTED_SYNC_BIT(bit)        ((u32)1 << (bit))
 +#define __NESTED_SYNC(name)   __NESTED_SYNC_BIT(NESTED_SYNC_ ## name ## _BIT)
 +
 +#define NESTED_SYNC_IMM               __NESTED_SYNC(IMM)
 +#define NESTED_SYNC_TODO      __NESTED_SYNC(TODO)
 +
 +struct netdev_nested_priv {
 +      unsigned char flags;
 +      void *data;
 +};
 +
  bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
  struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
                                                     struct list_head **iter);
  struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
                                                     struct list_head **iter);
  
 +#ifdef CONFIG_LOCKDEP
 +static LIST_HEAD(net_unlink_list);
 +
 +static inline void net_unlink_todo(struct net_device *dev)
 +{
 +      if (list_empty(&dev->unlink_list))
 +              list_add_tail(&dev->unlink_list, &net_unlink_list);
 +}
 +#endif
 +
  /* iterate through upper list, must be called under RCU read lock */
  #define netdev_for_each_upper_dev_rcu(dev, updev, iter) \
        for (iter = &(dev)->adj_list.upper, \
  
  int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
                                  int (*fn)(struct net_device *upper_dev,
 -                                          void *data),
 -                                void *data);
 +                                          struct netdev_nested_priv *priv),
 +                                struct netdev_nested_priv *priv);
  
  bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
                                  struct net_device *upper_dev);
@@@ -4584,12 -4534,12 +4588,12 @@@ struct net_device *netdev_next_lower_de
                                             struct list_head **iter);
  int netdev_walk_all_lower_dev(struct net_device *dev,
                              int (*fn)(struct net_device *lower_dev,
 -                                      void *data),
 -                            void *data);
 +                                      struct netdev_nested_priv *priv),
 +                            struct netdev_nested_priv *priv);
  int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
                                  int (*fn)(struct net_device *lower_dev,
 -                                          void *data),
 -                                void *data);
 +                                          struct netdev_nested_priv *priv),
 +                                struct netdev_nested_priv *priv);
  
  void *netdev_adjacent_get_private(struct list_head *adj_list);
  void *netdev_lower_get_first_private_rcu(struct net_device *dev);
diff --combined kernel/bpf/verifier.c
@@@ -238,6 -238,8 +238,8 @@@ struct bpf_call_arg_meta 
        u64 msize_max_value;
        int ref_obj_id;
        int func_id;
+       u32 btf_id;
+       u32 ret_btf_id;
  };
  
  struct btf *btf_vmlinux;
@@@ -517,6 -519,7 +519,7 @@@ static const char * const reg_type_str[
        [PTR_TO_XDP_SOCK]       = "xdp_sock",
        [PTR_TO_BTF_ID]         = "ptr_",
        [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
+       [PTR_TO_PERCPU_BTF_ID]  = "percpu_ptr_",
        [PTR_TO_MEM]            = "mem",
        [PTR_TO_MEM_OR_NULL]    = "mem_or_null",
        [PTR_TO_RDONLY_BUF]     = "rdonly_buf",
@@@ -583,7 -586,9 +586,9 @@@ static void print_verifier_state(struc
                        /* reg->off should be 0 for SCALAR_VALUE */
                        verbose(env, "%lld", reg->var_off.value + reg->off);
                } else {
-                       if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
+                       if (t == PTR_TO_BTF_ID ||
+                           t == PTR_TO_BTF_ID_OR_NULL ||
+                           t == PTR_TO_PERCPU_BTF_ID)
                                verbose(env, "%s", kernel_type_name(reg->btf_id));
                        verbose(env, "(id=%d", reg->id);
                        if (reg_type_may_be_refcounted_or_null(t))
@@@ -2204,6 -2209,7 +2209,7 @@@ static bool is_spillable_regtype(enum b
        case PTR_TO_RDONLY_BUF_OR_NULL:
        case PTR_TO_RDWR_BUF:
        case PTR_TO_RDWR_BUF_OR_NULL:
+       case PTR_TO_PERCPU_BTF_ID:
                return true;
        default:
                return false;
@@@ -2221,6 -2227,20 +2227,20 @@@ static bool register_is_const(struct bp
        return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
  }
  
+ static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
+ {
+       return tnum_is_unknown(reg->var_off) &&
+              reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
+              reg->umin_value == 0 && reg->umax_value == U64_MAX &&
+              reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
+              reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
+ }
+ static bool register_is_bounded(struct bpf_reg_state *reg)
+ {
+       return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
+ }
  static bool __is_pointer_value(bool allow_ptr_leaks,
                               const struct bpf_reg_state *reg)
  {
@@@ -2272,7 -2292,7 +2292,7 @@@ static int check_stack_write(struct bpf
        if (value_regno >= 0)
                reg = &cur->regs[value_regno];
  
-       if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
+       if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
            !register_is_null(reg) && env->bpf_capable) {
                if (dst_reg != BPF_REG_FP) {
                        /* The backtracking logic can only recognize explicit
@@@ -2667,7 -2687,7 +2687,7 @@@ static bool may_access_direct_pkt_data(
        case BPF_PROG_TYPE_CGROUP_SKB:
                if (t == BPF_WRITE)
                        return false;
-               /* fallthrough */
+               fallthrough;
  
        /* Program types with direct read + write access go here! */
        case BPF_PROG_TYPE_SCHED_CLS:
@@@ -3978,6 -3998,7 +3998,7 @@@ static const struct bpf_reg_types sock_
        },
  };
  
+ #ifdef CONFIG_NET
  static const struct bpf_reg_types btf_id_sock_common_types = {
        .types = {
                PTR_TO_SOCK_COMMON,
        },
        .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
  };
+ #endif
  
  static const struct bpf_reg_types mem_types = {
        .types = {
@@@ -4017,6 -4039,7 +4039,7 @@@ static const struct bpf_reg_types alloc
  static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
  static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
  static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
+ static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
  
  static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
        [ARG_PTR_TO_MAP_KEY]            = &map_key_value_types,
        [ARG_PTR_TO_CTX]                = &context_types,
        [ARG_PTR_TO_CTX_OR_NULL]        = &context_types,
        [ARG_PTR_TO_SOCK_COMMON]        = &sock_types,
+ #ifdef CONFIG_NET
        [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
+ #endif
        [ARG_PTR_TO_SOCKET]             = &fullsock_types,
        [ARG_PTR_TO_SOCKET_OR_NULL]     = &fullsock_types,
        [ARG_PTR_TO_BTF_ID]             = &btf_ptr_types,
        [ARG_PTR_TO_ALLOC_MEM_OR_NULL]  = &alloc_mem_types,
        [ARG_PTR_TO_INT]                = &int_ptr_types,
        [ARG_PTR_TO_LONG]               = &int_ptr_types,
+       [ARG_PTR_TO_PERCPU_BTF_ID]      = &percpu_btf_ptr_types,
  };
  
  static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@@ -4205,6 -4231,12 +4231,12 @@@ skip_type_check
                err = check_helper_mem_access(env, regno,
                                              meta->map_ptr->value_size, false,
                                              meta);
+       } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
+               if (!reg->btf_id) {
+                       verbose(env, "Helper has invalid btf_id in R%d\n", regno);
+                       return -EACCES;
+               }
+               meta->ret_btf_id = reg->btf_id;
        } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
                if (meta->func_id == BPF_FUNC_spin_lock) {
                        if (process_spin_lock(env, regno, true))
@@@ -5114,6 -5146,35 +5146,35 @@@ static int check_helper_call(struct bpf
                regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
                regs[BPF_REG_0].id = ++env->id_gen;
                regs[BPF_REG_0].mem_size = meta.mem_size;
+       } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
+                  fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
+               const struct btf_type *t;
+               mark_reg_known_zero(env, regs, BPF_REG_0);
+               t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
+               if (!btf_type_is_struct(t)) {
+                       u32 tsize;
+                       const struct btf_type *ret;
+                       const char *tname;
+                       /* resolve the type size of ksym. */
+                       ret = btf_resolve_size(btf_vmlinux, t, &tsize);
+                       if (IS_ERR(ret)) {
+                               tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+                               verbose(env, "unable to resolve the size of type '%s': %ld\n",
+                                       tname, PTR_ERR(ret));
+                               return -EINVAL;
+                       }
+                       regs[BPF_REG_0].type =
+                               fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
+                               PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
+                       regs[BPF_REG_0].mem_size = tsize;
+               } else {
+                       regs[BPF_REG_0].type =
+                               fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
+                               PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
+                       regs[BPF_REG_0].btf_id = meta.ret_btf_id;
+               }
        } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
                int ret_btf_id;
  
@@@ -5432,7 -5493,7 +5493,7 @@@ static int adjust_ptr_min_max_vals(stru
                /* smin_val represents the known value */
                if (known && smin_val == 0 && opcode == BPF_ADD)
                        break;
-               /* fall-through */
+               fallthrough;
        case PTR_TO_PACKET_END:
        case PTR_TO_SOCKET:
        case PTR_TO_SOCKET_OR_NULL:
@@@ -5880,8 -5941,8 +5941,8 @@@ static void scalar32_min_max_or(struct 
        bool src_known = tnum_subreg_is_const(src_reg->var_off);
        bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
        struct tnum var32_off = tnum_subreg(dst_reg->var_off);
 -      s32 smin_val = src_reg->smin_value;
 -      u32 umin_val = src_reg->umin_value;
 +      s32 smin_val = src_reg->s32_min_value;
 +      u32 umin_val = src_reg->u32_min_value;
  
        /* Assuming scalar64_min_max_or will be called so it is safe
         * to skip updating register for known case.
                /* ORing two positives gives a positive, so safe to
                 * cast result into s64.
                 */
 -              dst_reg->s32_min_value = dst_reg->umin_value;
 -              dst_reg->s32_max_value = dst_reg->umax_value;
 +              dst_reg->s32_min_value = dst_reg->u32_min_value;
 +              dst_reg->s32_max_value = dst_reg->u32_max_value;
        }
  }
  
@@@ -6389,6 -6450,11 +6450,11 @@@ static int adjust_reg_min_max_vals(stru
        src_reg = NULL;
        if (dst_reg->type != SCALAR_VALUE)
                ptr_reg = dst_reg;
+       else
+               /* Make sure ID is cleared otherwise dst_reg min/max could be
+                * incorrectly propagated into other registers by find_equal_scalars()
+                */
+               dst_reg->id = 0;
        if (BPF_SRC(insn->code) == BPF_X) {
                src_reg = &regs[insn->src_reg];
                if (src_reg->type != SCALAR_VALUE) {
@@@ -6522,6 -6588,12 +6588,12 @@@ static int check_alu_op(struct bpf_veri
                                /* case: R1 = R2
                                 * copy register state to dest reg
                                 */
+                               if (src_reg->type == SCALAR_VALUE && !src_reg->id)
+                                       /* Assign src and dst registers the same ID
+                                        * that will be used by find_equal_scalars()
+                                        * to propagate min/max range.
+                                        */
+                                       src_reg->id = ++env->id_gen;
                                *dst_reg = *src_reg;
                                dst_reg->live |= REG_LIVE_WRITTEN;
                                dst_reg->subreg_def = DEF_NOT_SUBREG;
                                        return -EACCES;
                                } else if (src_reg->type == SCALAR_VALUE) {
                                        *dst_reg = *src_reg;
+                                       /* Make sure ID is cleared otherwise
+                                        * dst_reg min/max could be incorrectly
+                                        * propagated into src_reg by find_equal_scalars()
+                                        */
+                                       dst_reg->id = 0;
                                        dst_reg->live |= REG_LIVE_WRITTEN;
                                        dst_reg->subreg_def = env->insn_idx + 1;
                                } else {
@@@ -7322,6 -7399,30 +7399,30 @@@ static bool try_match_pkt_pointers(cons
        return true;
  }
  
+ static void find_equal_scalars(struct bpf_verifier_state *vstate,
+                              struct bpf_reg_state *known_reg)
+ {
+       struct bpf_func_state *state;
+       struct bpf_reg_state *reg;
+       int i, j;
+       for (i = 0; i <= vstate->curframe; i++) {
+               state = vstate->frame[i];
+               for (j = 0; j < MAX_BPF_REG; j++) {
+                       reg = &state->regs[j];
+                       if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
+                               *reg = *known_reg;
+               }
+               bpf_for_each_spilled_reg(j, state, reg) {
+                       if (!reg)
+                               continue;
+                       if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
+                               *reg = *known_reg;
+               }
+       }
+ }
  static int check_cond_jmp_op(struct bpf_verifier_env *env,
                             struct bpf_insn *insn, int *insn_idx)
  {
                                reg_combine_min_max(&other_branch_regs[insn->src_reg],
                                                    &other_branch_regs[insn->dst_reg],
                                                    src_reg, dst_reg, opcode);
+                       if (src_reg->id) {
+                               find_equal_scalars(this_branch, src_reg);
+                               find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
+                       }
                }
        } else if (dst_reg->type == SCALAR_VALUE) {
                reg_set_min_max(&other_branch_regs[insn->dst_reg],
                                        opcode, is_jmp32);
        }
  
+       if (dst_reg->type == SCALAR_VALUE && dst_reg->id) {
+               find_equal_scalars(this_branch, dst_reg);
+               find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
+       }
        /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
         * NOTE: these optimizations below are related with pointer comparison
         *       which will never be JMP32.
@@@ -7488,6 -7599,7 +7599,7 @@@ static int check_ld_imm(struct bpf_veri
  {
        struct bpf_insn_aux_data *aux = cur_aux(env);
        struct bpf_reg_state *regs = cur_regs(env);
+       struct bpf_reg_state *dst_reg;
        struct bpf_map *map;
        int err;
  
        if (err)
                return err;
  
+       dst_reg = &regs[insn->dst_reg];
        if (insn->src_reg == 0) {
                u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
  
-               regs[insn->dst_reg].type = SCALAR_VALUE;
+               dst_reg->type = SCALAR_VALUE;
                __mark_reg_known(&regs[insn->dst_reg], imm);
                return 0;
        }
  
+       if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
+               mark_reg_known_zero(env, regs, insn->dst_reg);
+               dst_reg->type = aux->btf_var.reg_type;
+               switch (dst_reg->type) {
+               case PTR_TO_MEM:
+                       dst_reg->mem_size = aux->btf_var.mem_size;
+                       break;
+               case PTR_TO_BTF_ID:
+               case PTR_TO_PERCPU_BTF_ID:
+                       dst_reg->btf_id = aux->btf_var.btf_id;
+                       break;
+               default:
+                       verbose(env, "bpf verifier is misconfigured\n");
+                       return -EFAULT;
+               }
+               return 0;
+       }
        map = env->used_maps[aux->map_index];
        mark_reg_known_zero(env, regs, insn->dst_reg);
-       regs[insn->dst_reg].map_ptr = map;
+       dst_reg->map_ptr = map;
  
        if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
-               regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
-               regs[insn->dst_reg].off = aux->map_off;
+               dst_reg->type = PTR_TO_MAP_VALUE;
+               dst_reg->off = aux->map_off;
                if (map_value_has_spin_lock(map))
-                       regs[insn->dst_reg].id = ++env->id_gen;
+                       dst_reg->id = ++env->id_gen;
        } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
-               regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
+               dst_reg->type = CONST_PTR_TO_MAP;
        } else {
                verbose(env, "bpf verifier is misconfigured\n");
                return -EINVAL;
@@@ -9424,6 -9556,92 +9556,92 @@@ process_bpf_exit
        return 0;
  }
  
+ /* replace pseudo btf_id with kernel symbol address */
+ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
+                              struct bpf_insn *insn,
+                              struct bpf_insn_aux_data *aux)
+ {
+       u32 datasec_id, type, id = insn->imm;
+       const struct btf_var_secinfo *vsi;
+       const struct btf_type *datasec;
+       const struct btf_type *t;
+       const char *sym_name;
+       bool percpu = false;
+       u64 addr;
+       int i;
+       if (!btf_vmlinux) {
+               verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
+               return -EINVAL;
+       }
+       if (insn[1].imm != 0) {
+               verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
+               return -EINVAL;
+       }
+       t = btf_type_by_id(btf_vmlinux, id);
+       if (!t) {
+               verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
+               return -ENOENT;
+       }
+       if (!btf_type_is_var(t)) {
+               verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
+                       id);
+               return -EINVAL;
+       }
+       sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
+       addr = kallsyms_lookup_name(sym_name);
+       if (!addr) {
+               verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
+                       sym_name);
+               return -ENOENT;
+       }
+       datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
+                                          BTF_KIND_DATASEC);
+       if (datasec_id > 0) {
+               datasec = btf_type_by_id(btf_vmlinux, datasec_id);
+               for_each_vsi(i, datasec, vsi) {
+                       if (vsi->type == id) {
+                               percpu = true;
+                               break;
+                       }
+               }
+       }
+       insn[0].imm = (u32)addr;
+       insn[1].imm = addr >> 32;
+       type = t->type;
+       t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
+       if (percpu) {
+               aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
+               aux->btf_var.btf_id = type;
+       } else if (!btf_type_is_struct(t)) {
+               const struct btf_type *ret;
+               const char *tname;
+               u32 tsize;
+               /* resolve the type size of ksym. */
+               ret = btf_resolve_size(btf_vmlinux, t, &tsize);
+               if (IS_ERR(ret)) {
+                       tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+                       verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
+                               tname, PTR_ERR(ret));
+                       return -EINVAL;
+               }
+               aux->btf_var.reg_type = PTR_TO_MEM;
+               aux->btf_var.mem_size = tsize;
+       } else {
+               aux->btf_var.reg_type = PTR_TO_BTF_ID;
+               aux->btf_var.btf_id = type;
+       }
+       return 0;
+ }
  static int check_map_prealloc(struct bpf_map *map)
  {
        return (map->map_type != BPF_MAP_TYPE_HASH &&
@@@ -9534,10 -9752,14 +9752,14 @@@ static bool bpf_map_is_cgroup_storage(s
                map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
  }
  
- /* look for pseudo eBPF instructions that access map FDs and
-  * replace them with actual map pointers
+ /* find and rewrite pseudo imm in ld_imm64 instructions:
+  *
+  * 1. if it accesses map FD, replace it with actual map pointer.
+  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
+  *
+  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
   */
- static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
+ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
  {
        struct bpf_insn *insn = env->prog->insnsi;
        int insn_cnt = env->prog->len;
                                /* valid generic load 64-bit imm */
                                goto next_insn;
  
+                       if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
+                               aux = &env->insn_aux_data[i];
+                               err = check_pseudo_btf_id(env, insn, aux);
+                               if (err)
+                                       return err;
+                               goto next_insn;
+                       }
                        /* In final convert_pseudo_ld_imm64() step, this is
                         * converted into regular 64-bit imm load insn.
                         */
@@@ -10819,7 -11049,9 +11049,9 @@@ static int fixup_bpf_calls(struct bpf_v
                        if (insn->imm == BPF_FUNC_map_lookup_elem &&
                            ops->map_gen_lookup) {
                                cnt = ops->map_gen_lookup(map_ptr, insn_buf);
-                               if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+                               if (cnt == -EOPNOTSUPP)
+                                       goto patch_map_ops_generic;
+                               if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
                                        verbose(env, "bpf verifier is misconfigured\n");
                                        return -EINVAL;
                                }
                                     (int (*)(struct bpf_map *map, void *value))NULL));
                        BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
                                     (int (*)(struct bpf_map *map, void *value))NULL));
+ patch_map_ops_generic:
                        switch (insn->imm) {
                        case BPF_FUNC_map_lookup_elem:
                                insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
@@@ -11633,10 -11865,6 +11865,6 @@@ int bpf_check(struct bpf_prog **prog, u
        if (is_priv)
                env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
  
-       ret = replace_map_fd_with_map_ptr(env);
-       if (ret < 0)
-               goto skip_full_check;
        if (bpf_prog_is_dev_bound(env->prog->aux)) {
                ret = bpf_prog_offload_verifier_prep(env->prog);
                if (ret)
        if (ret)
                goto skip_full_check;
  
+       ret = resolve_pseudo_ldimm64(env);
+       if (ret < 0)
+               goto skip_full_check;
        ret = check_cfg(env);
        if (ret < 0)
                goto skip_full_check;
diff --combined net/core/dev.c
@@@ -4930,7 -4930,7 +4930,7 @@@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook
  
  static inline struct sk_buff *
  sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
-                  struct net_device *orig_dev)
+                  struct net_device *orig_dev, bool *another)
  {
  #ifdef CONFIG_NET_CLS_ACT
        struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
                 * redirecting to another netdev
                 */
                __skb_push(skb, skb->mac_len);
-               skb_do_redirect(skb);
+               if (skb_do_redirect(skb) == -EAGAIN) {
+                       __skb_pull(skb, skb->mac_len);
+                       *another = true;
+                       break;
+               }
                return NULL;
        case TC_ACT_CONSUMED:
                return NULL;
@@@ -5163,7 -5167,12 +5167,12 @@@ another_round
  skip_taps:
  #ifdef CONFIG_NET_INGRESS
        if (static_branch_unlikely(&ingress_needed_key)) {
-               skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
+               bool another = false;
+               skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
+                                        &another);
+               if (another)
+                       goto another_round;
                if (!skb)
                        goto out;
  
@@@ -6874,10 -6883,9 +6883,10 @@@ static struct netdev_adjacent *__netdev
        return NULL;
  }
  
 -static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data)
 +static int ____netdev_has_upper_dev(struct net_device *upper_dev,
 +                                  struct netdev_nested_priv *priv)
  {
 -      struct net_device *dev = data;
 +      struct net_device *dev = (struct net_device *)priv->data;
  
        return upper_dev == dev;
  }
  bool netdev_has_upper_dev(struct net_device *dev,
                          struct net_device *upper_dev)
  {
 +      struct netdev_nested_priv priv = {
 +              .data = (void *)upper_dev,
 +      };
 +
        ASSERT_RTNL();
  
        return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
 -                                           upper_dev);
 +                                           &priv);
  }
  EXPORT_SYMBOL(netdev_has_upper_dev);
  
  bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
                                  struct net_device *upper_dev)
  {
 +      struct netdev_nested_priv priv = {
 +              .data = (void *)upper_dev,
 +      };
 +
        return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
 -                                             upper_dev);
 +                                             &priv);
  }
  EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
  
@@@ -7068,8 -7068,8 +7077,8 @@@ static struct net_device *netdev_next_u
  
  static int __netdev_walk_all_upper_dev(struct net_device *dev,
                                       int (*fn)(struct net_device *dev,
 -                                               void *data),
 -                                     void *data)
 +                                       struct netdev_nested_priv *priv),
 +                                     struct netdev_nested_priv *priv)
  {
        struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
        struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
  
        while (1) {
                if (now != dev) {
 -                      ret = fn(now, data);
 +                      ret = fn(now, priv);
                        if (ret)
                                return ret;
                }
  
  int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
                                  int (*fn)(struct net_device *dev,
 -                                          void *data),
 -                                void *data)
 +                                          struct netdev_nested_priv *priv),
 +                                struct netdev_nested_priv *priv)
  {
        struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
        struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
  
        while (1) {
                if (now != dev) {
 -                      ret = fn(now, data);
 +                      ret = fn(now, priv);
                        if (ret)
                                return ret;
                }
@@@ -7165,15 -7165,10 +7174,15 @@@ EXPORT_SYMBOL_GPL(netdev_walk_all_upper
  static bool __netdev_has_upper_dev(struct net_device *dev,
                                   struct net_device *upper_dev)
  {
 +      struct netdev_nested_priv priv = {
 +              .flags = 0,
 +              .data = (void *)upper_dev,
 +      };
 +
        ASSERT_RTNL();
  
        return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
 -                                         upper_dev);
 +                                         &priv);
  }
  
  /**
@@@ -7291,8 -7286,8 +7300,8 @@@ static struct net_device *__netdev_next
  
  int netdev_walk_all_lower_dev(struct net_device *dev,
                              int (*fn)(struct net_device *dev,
 -                                      void *data),
 -                            void *data)
 +                                      struct netdev_nested_priv *priv),
 +                            struct netdev_nested_priv *priv)
  {
        struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
        struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
  
        while (1) {
                if (now != dev) {
 -                      ret = fn(now, data);
 +                      ret = fn(now, priv);
                        if (ret)
                                return ret;
                }
@@@ -7338,8 -7333,8 +7347,8 @@@ EXPORT_SYMBOL_GPL(netdev_walk_all_lower
  
  static int __netdev_walk_all_lower_dev(struct net_device *dev,
                                       int (*fn)(struct net_device *dev,
 -                                               void *data),
 -                                     void *data)
 +                                       struct netdev_nested_priv *priv),
 +                                     struct netdev_nested_priv *priv)
  {
        struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
        struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
  
        while (1) {
                if (now != dev) {
 -                      ret = fn(now, data);
 +                      ret = fn(now, priv);
                        if (ret)
                                return ret;
                }
@@@ -7440,34 -7435,22 +7449,34 @@@ static u8 __netdev_lower_depth(struct n
        return max_depth;
  }
  
 -static int __netdev_update_upper_level(struct net_device *dev, void *data)
 +static int __netdev_update_upper_level(struct net_device *dev,
 +                                     struct netdev_nested_priv *__unused)
  {
        dev->upper_level = __netdev_upper_depth(dev) + 1;
        return 0;
  }
  
 -static int __netdev_update_lower_level(struct net_device *dev, void *data)
 +static int __netdev_update_lower_level(struct net_device *dev,
 +                                     struct netdev_nested_priv *priv)
  {
        dev->lower_level = __netdev_lower_depth(dev) + 1;
 +
 +#ifdef CONFIG_LOCKDEP
 +      if (!priv)
 +              return 0;
 +
 +      if (priv->flags & NESTED_SYNC_IMM)
 +              dev->nested_level = dev->lower_level - 1;
 +      if (priv->flags & NESTED_SYNC_TODO)
 +              net_unlink_todo(dev);
 +#endif
        return 0;
  }
  
  int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
                                  int (*fn)(struct net_device *dev,
 -                                          void *data),
 -                                void *data)
 +                                          struct netdev_nested_priv *priv),
 +                                struct netdev_nested_priv *priv)
  {
        struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
        struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
  
        while (1) {
                if (now != dev) {
 -                      ret = fn(now, data);
 +                      ret = fn(now, priv);
                        if (ret)
                                return ret;
                }
@@@ -7738,7 -7721,6 +7747,7 @@@ static void __netdev_adjacent_dev_unlin
  static int __netdev_upper_dev_link(struct net_device *dev,
                                   struct net_device *upper_dev, bool master,
                                   void *upper_priv, void *upper_info,
 +                                 struct netdev_nested_priv *priv,
                                   struct netlink_ext_ack *extack)
  {
        struct netdev_notifier_changeupper_info changeupper_info = {
        __netdev_update_upper_level(dev, NULL);
        __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
  
 -      __netdev_update_lower_level(upper_dev, NULL);
 +      __netdev_update_lower_level(upper_dev, priv);
        __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
 -                                  NULL);
 +                                  priv);
  
        return 0;
  
@@@ -7822,13 -7804,8 +7831,13 @@@ int netdev_upper_dev_link(struct net_de
                          struct net_device *upper_dev,
                          struct netlink_ext_ack *extack)
  {
 +      struct netdev_nested_priv priv = {
 +              .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
 +              .data = NULL,
 +      };
 +
        return __netdev_upper_dev_link(dev, upper_dev, false,
 -                                     NULL, NULL, extack);
 +                                     NULL, NULL, &priv, extack);
  }
  EXPORT_SYMBOL(netdev_upper_dev_link);
  
@@@ -7851,19 -7828,21 +7860,19 @@@ int netdev_master_upper_dev_link(struc
                                 void *upper_priv, void *upper_info,
                                 struct netlink_ext_ack *extack)
  {
 +      struct netdev_nested_priv priv = {
 +              .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
 +              .data = NULL,
 +      };
 +
        return __netdev_upper_dev_link(dev, upper_dev, true,
 -                                     upper_priv, upper_info, extack);
 +                                     upper_priv, upper_info, &priv, extack);
  }
  EXPORT_SYMBOL(netdev_master_upper_dev_link);
  
 -/**
 - * netdev_upper_dev_unlink - Removes a link to upper device
 - * @dev: device
 - * @upper_dev: new upper device
 - *
 - * Removes a link to device which is upper to this one. The caller must hold
 - * the RTNL lock.
 - */
 -void netdev_upper_dev_unlink(struct net_device *dev,
 -                           struct net_device *upper_dev)
 +static void __netdev_upper_dev_unlink(struct net_device *dev,
 +                                    struct net_device *upper_dev,
 +                                    struct netdev_nested_priv *priv)
  {
        struct netdev_notifier_changeupper_info changeupper_info = {
                .info = {
        __netdev_update_upper_level(dev, NULL);
        __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
  
 -      __netdev_update_lower_level(upper_dev, NULL);
 +      __netdev_update_lower_level(upper_dev, priv);
        __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
 -                                  NULL);
 +                                  priv);
 +}
 +
 +/**
 + * netdev_upper_dev_unlink - Removes a link to upper device
 + * @dev: device
 + * @upper_dev: new upper device
 + *
 + * Removes a link to device which is upper to this one. The caller must hold
 + * the RTNL lock.
 + */
 +void netdev_upper_dev_unlink(struct net_device *dev,
 +                           struct net_device *upper_dev)
 +{
 +      struct netdev_nested_priv priv = {
 +              .flags = NESTED_SYNC_TODO,
 +              .data = NULL,
 +      };
 +
 +      __netdev_upper_dev_unlink(dev, upper_dev, &priv);
  }
  EXPORT_SYMBOL(netdev_upper_dev_unlink);
  
@@@ -7945,10 -7905,6 +7954,10 @@@ int netdev_adjacent_change_prepare(stru
                                   struct net_device *dev,
                                   struct netlink_ext_ack *extack)
  {
 +      struct netdev_nested_priv priv = {
 +              .flags = 0,
 +              .data = NULL,
 +      };
        int err;
  
        if (!new_dev)
  
        if (old_dev && new_dev != old_dev)
                netdev_adjacent_dev_disable(dev, old_dev);
 -
 -      err = netdev_upper_dev_link(new_dev, dev, extack);
 +      err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv,
 +                                    extack);
        if (err) {
                if (old_dev && new_dev != old_dev)
                        netdev_adjacent_dev_enable(dev, old_dev);
@@@ -7972,11 -7928,6 +7981,11 @@@ void netdev_adjacent_change_commit(stru
                                   struct net_device *new_dev,
                                   struct net_device *dev)
  {
 +      struct netdev_nested_priv priv = {
 +              .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
 +              .data = NULL,
 +      };
 +
        if (!new_dev || !old_dev)
                return;
  
                return;
  
        netdev_adjacent_dev_enable(dev, old_dev);
 -      netdev_upper_dev_unlink(old_dev, dev);
 +      __netdev_upper_dev_unlink(old_dev, dev, &priv);
  }
  EXPORT_SYMBOL(netdev_adjacent_change_commit);
  
@@@ -7992,18 -7943,13 +8001,18 @@@ void netdev_adjacent_change_abort(struc
                                  struct net_device *new_dev,
                                  struct net_device *dev)
  {
 +      struct netdev_nested_priv priv = {
 +              .flags = 0,
 +              .data = NULL,
 +      };
 +
        if (!new_dev)
                return;
  
        if (old_dev && new_dev != old_dev)
                netdev_adjacent_dev_enable(dev, old_dev);
  
 -      netdev_upper_dev_unlink(new_dev, dev);
 +      __netdev_upper_dev_unlink(new_dev, dev, &priv);
  }
  EXPORT_SYMBOL(netdev_adjacent_change_abort);
  
@@@ -10195,19 -10141,6 +10204,19 @@@ static void netdev_wait_allrefs(struct 
  void netdev_run_todo(void)
  {
        struct list_head list;
 +#ifdef CONFIG_LOCKDEP
 +      struct list_head unlink_list;
 +
 +      list_replace_init(&net_unlink_list, &unlink_list);
 +
 +      while (!list_empty(&unlink_list)) {
 +              struct net_device *dev = list_first_entry(&unlink_list,
 +                                                        struct net_device,
 +                                                        unlink_list);
 +              list_del(&dev->unlink_list);
 +              dev->nested_level = dev->lower_level - 1;
 +      }
 +#endif
  
        /* Snapshot list, allow later requests */
        list_replace_init(&net_todo_list, &list);
@@@ -10420,10 -10353,6 +10429,10 @@@ struct net_device *alloc_netdev_mqs(in
        dev->gso_max_segs = GSO_MAX_SEGS;
        dev->upper_level = 1;
        dev->lower_level = 1;
 +#ifdef CONFIG_LOCKDEP
 +      dev->nested_level = 0;
 +      INIT_LIST_HEAD(&dev->unlink_list);
 +#endif
  
        INIT_LIST_HEAD(&dev->napi_list);
        INIT_LIST_HEAD(&dev->unreg_list);
diff --combined net/core/filter.c
@@@ -76,6 -76,7 +76,7 @@@
  #include <net/bpf_sk_storage.h>
  #include <net/transp_v6.h>
  #include <linux/btf_ids.h>
+ #include <net/tls.h>
  
  static const struct bpf_func_proto *
  bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@@ -2379,8 -2380,9 +2380,9 @@@ out
  
  /* Internal, non-exposed redirect flags. */
  enum {
-       BPF_F_NEIGH = (1ULL << 1),
- #define BPF_F_REDIRECT_INTERNAL       (BPF_F_NEIGH)
+       BPF_F_NEIGH     = (1ULL << 1),
+       BPF_F_PEER      = (1ULL << 2),
+ #define BPF_F_REDIRECT_INTERNAL       (BPF_F_NEIGH | BPF_F_PEER)
  };
  
  BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
@@@ -2429,19 -2431,35 +2431,35 @@@ EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_
  int skb_do_redirect(struct sk_buff *skb)
  {
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+       struct net *net = dev_net(skb->dev);
        struct net_device *dev;
        u32 flags = ri->flags;
  
-       dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
+       dev = dev_get_by_index_rcu(net, ri->tgt_index);
        ri->tgt_index = 0;
-       if (unlikely(!dev)) {
-               kfree_skb(skb);
-               return -EINVAL;
+       ri->flags = 0;
+       if (unlikely(!dev))
+               goto out_drop;
+       if (flags & BPF_F_PEER) {
+               const struct net_device_ops *ops = dev->netdev_ops;
+               if (unlikely(!ops->ndo_get_peer_dev ||
+                            !skb_at_tc_ingress(skb)))
+                       goto out_drop;
+               dev = ops->ndo_get_peer_dev(dev);
+               if (unlikely(!dev ||
+                            !is_skb_forwardable(dev, skb) ||
+                            net_eq(net, dev_net(dev))))
+                       goto out_drop;
+               skb->dev = dev;
+               return -EAGAIN;
        }
        return flags & BPF_F_NEIGH ?
               __bpf_redirect_neigh(skb, dev) :
               __bpf_redirect(skb, dev, flags);
+ out_drop:
+       kfree_skb(skb);
+       return -EINVAL;
  }
  
  BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
@@@ -2465,6 -2483,27 +2483,27 @@@ static const struct bpf_func_proto bpf_
        .arg2_type      = ARG_ANYTHING,
  };
  
+ BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
+ {
+       struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+       if (unlikely(flags))
+               return TC_ACT_SHOT;
+       ri->flags = BPF_F_PEER;
+       ri->tgt_index = ifindex;
+       return TC_ACT_REDIRECT;
+ }
+ static const struct bpf_func_proto bpf_redirect_peer_proto = {
+       .func           = bpf_redirect_peer,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+       .arg2_type      = ARG_ANYTHING,
+ };
  BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
  {
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
@@@ -3479,6 -3518,48 +3518,48 @@@ static u32 __bpf_skb_max_len(const stru
                          SKB_MAX_ALLOC;
  }
  
+ BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
+          u32, mode, u64, flags)
+ {
+       u32 len_diff_abs = abs(len_diff);
+       bool shrink = len_diff < 0;
+       int ret = 0;
+       if (unlikely(flags || mode))
+               return -EINVAL;
+       if (unlikely(len_diff_abs > 0xfffU))
+               return -EFAULT;
+       if (!shrink) {
+               ret = skb_cow(skb, len_diff);
+               if (unlikely(ret < 0))
+                       return ret;
+               __skb_push(skb, len_diff_abs);
+               memset(skb->data, 0, len_diff_abs);
+       } else {
+               if (unlikely(!pskb_may_pull(skb, len_diff_abs)))
+                       return -ENOMEM;
+               __skb_pull(skb, len_diff_abs);
+       }
+       bpf_compute_data_end_sk_skb(skb);
+       if (tls_sw_has_ctx_rx(skb->sk)) {
+               struct strp_msg *rxm = strp_msg(skb);
+               rxm->full_len += len_diff;
+       }
+       return ret;
+ }
+ static const struct bpf_func_proto sk_skb_adjust_room_proto = {
+       .func           = sk_skb_adjust_room,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_ANYTHING,
+ };
  BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
           u32, mode, u64, flags)
  {
@@@ -4784,6 -4865,10 +4865,10 @@@ static int _bpf_setsockopt(struct sock 
                                else
                                        icsk->icsk_user_timeout = val;
                                break;
+                       case TCP_NOTSENT_LOWAT:
+                               tp->notsent_lowat = val;
+                               sk->sk_write_space(sk);
+                               break;
                        default:
                                ret = -EINVAL;
                        }
@@@ -5149,7 -5234,6 +5234,6 @@@ static int bpf_fib_set_fwd_params(struc
        memcpy(params->smac, dev->dev_addr, ETH_ALEN);
        params->h_vlan_TCI = 0;
        params->h_vlan_proto = 0;
-       params->ifindex = dev->ifindex;
  
        return 0;
  }
@@@ -5246,6 -5330,7 +5330,7 @@@ static int bpf_ipv4_fib_lookup(struct n
        dev = nhc->nhc_dev;
  
        params->rt_metric = res.fi->fib_priority;
+       params->ifindex = dev->ifindex;
  
        /* xdp and cls_bpf programs are run in RCU-bh so
         * rcu_read_lock_bh is not needed here
@@@ -5371,6 -5456,7 +5456,7 @@@ static int bpf_ipv6_fib_lookup(struct n
  
        dev = res.nh->fib_nh_dev;
        params->rt_metric = res.f6i->fib6_metric;
+       params->ifindex = dev->ifindex;
  
        /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
         * not needed here.
@@@ -6745,6 -6831,7 +6831,7 @@@ bool bpf_helper_changes_pkt_data(void *
            func == bpf_skb_change_tail ||
            func == sk_skb_change_tail ||
            func == bpf_skb_adjust_room ||
+           func == sk_skb_adjust_room ||
            func == bpf_skb_pull_data ||
            func == sk_skb_pull_data ||
            func == bpf_clone_redirect ||
@@@ -7005,6 -7092,8 +7092,8 @@@ tc_cls_act_func_proto(enum bpf_func_id 
                return &bpf_redirect_proto;
        case BPF_FUNC_redirect_neigh:
                return &bpf_redirect_neigh_proto;
+       case BPF_FUNC_redirect_peer:
+               return &bpf_redirect_peer_proto;
        case BPF_FUNC_get_route_realm:
                return &bpf_get_route_realm_proto;
        case BPF_FUNC_get_hash_recalc:
@@@ -7218,6 -7307,8 +7307,8 @@@ sk_skb_func_proto(enum bpf_func_id func
                return &sk_skb_change_tail_proto;
        case BPF_FUNC_skb_change_head:
                return &sk_skb_change_head_proto;
+       case BPF_FUNC_skb_adjust_room:
+               return &sk_skb_adjust_room_proto;
        case BPF_FUNC_get_socket_cookie:
                return &bpf_get_socket_cookie_proto;
        case BPF_FUNC_get_socket_uid:
@@@ -10203,12 -10294,6 +10294,12 @@@ const struct bpf_func_proto bpf_skc_to_
  
  BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
  {
 +      /* BTF types for tcp_timewait_sock and inet_timewait_sock are not
 +       * generated if CONFIG_INET=n. Trigger an explicit generation here.
 +       */
 +      BTF_TYPE_EMIT(struct inet_timewait_sock);
 +      BTF_TYPE_EMIT(struct tcp_timewait_sock);
 +
  #ifdef CONFIG_INET
        if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
                return (unsigned long)sk;
diff --combined tools/lib/bpf/libbpf.c
@@@ -390,6 -390,12 +390,12 @@@ struct extern_desc 
                } kcfg;
                struct {
                        unsigned long long addr;
+                       /* target btf_id of the corresponding kernel var. */
+                       int vmlinux_btf_id;
+                       /* local btf_id of the ksym extern's type. */
+                       __u32 type_id;
                } ksym;
        };
  };
@@@ -2522,12 -2528,23 +2528,23 @@@ static int bpf_object__load_vmlinux_btf
  {
        bool need_vmlinux_btf = false;
        struct bpf_program *prog;
-       int err;
+       int i, err;
  
        /* CO-RE relocations need kernel BTF */
        if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
                need_vmlinux_btf = true;
  
+       /* Support for typed ksyms needs kernel BTF */
+       for (i = 0; i < obj->nr_extern; i++) {
+               const struct extern_desc *ext;
+               ext = &obj->externs[i];
+               if (ext->type == EXT_KSYM && ext->ksym.type_id) {
+                       need_vmlinux_btf = true;
+                       break;
+               }
+       }
        bpf_object__for_each_program(prog, obj) {
                if (!prog->load)
                        continue;
@@@ -3156,16 -3173,10 +3173,10 @@@ static int bpf_object__collect_externs(
                                return -ENOTSUP;
                        }
                } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
-                       const struct btf_type *vt;
                        ksym_sec = sec;
                        ext->type = EXT_KSYM;
-                       vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
-                       if (!btf_is_void(vt)) {
-                               pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
-                               return -ENOTSUP;
-                       }
+                       skip_mods_and_typedefs(obj->btf, t->type,
+                                              &ext->ksym.type_id);
                } else {
                        pr_warn("unrecognized extern section '%s'\n", sec_name);
                        return -ENOTSUP;
@@@ -4192,6 -4203,36 +4203,36 @@@ static int bpf_object__create_map(struc
        return 0;
  }
  
+ static int init_map_slots(struct bpf_map *map)
+ {
+       const struct bpf_map *targ_map;
+       unsigned int i;
+       int fd, err;
+       for (i = 0; i < map->init_slots_sz; i++) {
+               if (!map->init_slots[i])
+                       continue;
+               targ_map = map->init_slots[i];
+               fd = bpf_map__fd(targ_map);
+               err = bpf_map_update_elem(map->fd, &i, &fd, 0);
+               if (err) {
+                       err = -errno;
+                       pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
+                               map->name, i, targ_map->name,
+                               fd, err);
+                       return err;
+               }
+               pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
+                        map->name, i, targ_map->name, fd);
+       }
+       zfree(&map->init_slots);
+       map->init_slots_sz = 0;
+       return 0;
+ }
  static int
  bpf_object__create_maps(struct bpf_object *obj)
  {
                if (map->fd >= 0) {
                        pr_debug("map '%s': skipping creation (preset fd=%d)\n",
                                 map->name, map->fd);
-                       continue;
-               }
-               err = bpf_object__create_map(obj, map);
-               if (err)
-                       goto err_out;
-               pr_debug("map '%s': created successfully, fd=%d\n", map->name,
-                        map->fd);
-               if (bpf_map__is_internal(map)) {
-                       err = bpf_object__populate_internal_map(obj, map);
-                       if (err < 0) {
-                               zclose(map->fd);
+               } else {
+                       err = bpf_object__create_map(obj, map);
+                       if (err)
                                goto err_out;
-                       }
-               }
  
-               if (map->init_slots_sz) {
-                       for (j = 0; j < map->init_slots_sz; j++) {
-                               const struct bpf_map *targ_map;
-                               int fd;
+                       pr_debug("map '%s': created successfully, fd=%d\n",
+                                map->name, map->fd);
  
-                               if (!map->init_slots[j])
-                                       continue;
+                       if (bpf_map__is_internal(map)) {
+                               err = bpf_object__populate_internal_map(obj, map);
+                               if (err < 0) {
+                                       zclose(map->fd);
+                                       goto err_out;
+                               }
+                       }
  
-                               targ_map = map->init_slots[j];
-                               fd = bpf_map__fd(targ_map);
-                               err = bpf_map_update_elem(map->fd, &j, &fd, 0);
-                               if (err) {
-                                       err = -errno;
-                                       pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
-                                               map->name, j, targ_map->name,
-                                               fd, err);
+                       if (map->init_slots_sz) {
+                               err = init_map_slots(map);
+                               if (err < 0) {
+                                       zclose(map->fd);
                                        goto err_out;
                                }
-                               pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
-                                        map->name, j, targ_map->name, fd);
                        }
-                       zfree(&map->init_slots);
-                       map->init_slots_sz = 0;
                }
  
                if (map->pin_path && !map->pinned) {
@@@ -5017,16 -5040,19 +5040,19 @@@ static int bpf_core_spec_match(struct b
  static int bpf_core_calc_field_relo(const struct bpf_program *prog,
                                    const struct bpf_core_relo *relo,
                                    const struct bpf_core_spec *spec,
-                                   __u32 *val, bool *validate)
+                                   __u32 *val, __u32 *field_sz, __u32 *type_id,
+                                   bool *validate)
  {
        const struct bpf_core_accessor *acc;
        const struct btf_type *t;
-       __u32 byte_off, byte_sz, bit_off, bit_sz;
+       __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
        const struct btf_member *m;
        const struct btf_type *mt;
        bool bitfield;
        __s64 sz;
  
+       *field_sz = 0;
        if (relo->kind == BPF_FIELD_EXISTS) {
                *val = spec ? 1 : 0;
                return 0;
        if (!acc->name) {
                if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
                        *val = spec->bit_offset / 8;
+                       /* remember field size for load/store mem size */
+                       sz = btf__resolve_size(spec->btf, acc->type_id);
+                       if (sz < 0)
+                               return -EINVAL;
+                       *field_sz = sz;
+                       *type_id = acc->type_id;
                } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
                        sz = btf__resolve_size(spec->btf, acc->type_id);
                        if (sz < 0)
        }
  
        m = btf_members(t) + acc->idx;
-       mt = skip_mods_and_typedefs(spec->btf, m->type, NULL);
+       mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
        bit_off = spec->bit_offset;
        bit_sz = btf_member_bitfield_size(t, acc->idx);
  
                        byte_off = bit_off / 8 / byte_sz * byte_sz;
                }
        } else {
-               sz = btf__resolve_size(spec->btf, m->type);
+               sz = btf__resolve_size(spec->btf, field_type_id);
                if (sz < 0)
                        return -EINVAL;
                byte_sz = sz;
        switch (relo->kind) {
        case BPF_FIELD_BYTE_OFFSET:
                *val = byte_off;
+               if (!bitfield) {
+                       *field_sz = byte_sz;
+                       *type_id = field_type_id;
+               }
                break;
        case BPF_FIELD_BYTE_SIZE:
                *val = byte_sz;
@@@ -5196,6 -5232,19 +5232,19 @@@ struct bpf_core_relo_re
        bool poison;
        /* some relocations can't be validated against orig_val */
        bool validate;
+       /* for field byte offset relocations or the forms:
+        *     *(T *)(rX + <off>) = rY
+        *     rX = *(T *)(rY + <off>),
+        * we remember original and resolved field size to adjust direct
+        * memory loads of pointers and integers; this is necessary for 32-bit
+        * host kernel architectures, but also allows to automatically
+        * relocate fields that were resized from, e.g., u32 to u64, etc.
+        */
+       bool fail_memsz_adjust;
+       __u32 orig_sz;
+       __u32 orig_type_id;
+       __u32 new_sz;
+       __u32 new_type_id;
  };
  
  /* Calculate original and target relocation values, given local and target
@@@ -5217,10 -5266,56 +5266,56 @@@ static int bpf_core_calc_relo(const str
        res->new_val = 0;
        res->poison = false;
        res->validate = true;
+       res->fail_memsz_adjust = false;
+       res->orig_sz = res->new_sz = 0;
+       res->orig_type_id = res->new_type_id = 0;
  
        if (core_relo_is_field_based(relo->kind)) {
-               err = bpf_core_calc_field_relo(prog, relo, local_spec, &res->orig_val, &res->validate);
-               err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, &res->new_val, NULL);
+               err = bpf_core_calc_field_relo(prog, relo, local_spec,
+                                              &res->orig_val, &res->orig_sz,
+                                              &res->orig_type_id, &res->validate);
+               err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
+                                                     &res->new_val, &res->new_sz,
+                                                     &res->new_type_id, NULL);
+               if (err)
+                       goto done;
+               /* Validate if it's safe to adjust load/store memory size.
+                * Adjustments are performed only if original and new memory
+                * sizes differ.
+                */
+               res->fail_memsz_adjust = false;
+               if (res->orig_sz != res->new_sz) {
+                       const struct btf_type *orig_t, *new_t;
+                       orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
+                       new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
+                       /* There are two use cases in which it's safe to
+                        * adjust load/store's mem size:
+                        *   - reading a 32-bit kernel pointer, while on BPF
+                        *   size pointers are always 64-bit; in this case
+                        *   it's safe to "downsize" instruction size due to
+                        *   pointer being treated as unsigned integer with
+                        *   zero-extended upper 32-bits;
+                        *   - reading unsigned integers, again due to
+                        *   zero-extension is preserving the value correctly.
+                        *
+                        * In all other cases it's incorrect to attempt to
+                        * load/store field because read value will be
+                        * incorrect, so we poison relocated instruction.
+                        */
+                       if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
+                               goto done;
+                       if (btf_is_int(orig_t) && btf_is_int(new_t) &&
+                           btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
+                           btf_int_encoding(new_t) != BTF_INT_SIGNED)
+                               goto done;
+                       /* mark as invalid mem size adjustment, but this will
+                        * only be checked for LDX/STX/ST insns
+                        */
+                       res->fail_memsz_adjust = true;
+               }
        } else if (core_relo_is_type_based(relo->kind)) {
                err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
                err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
                err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
        }
  
+ done:
        if (err == -EUCLEAN) {
                /* EUCLEAN is used to signal instruction poisoning request */
                res->poison = true;
@@@ -5268,6 -5364,28 +5364,28 @@@ static bool is_ldimm64(struct bpf_insn 
        return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
  }
  
+ static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
+ {
+       switch (BPF_SIZE(insn->code)) {
+       case BPF_DW: return 8;
+       case BPF_W: return 4;
+       case BPF_H: return 2;
+       case BPF_B: return 1;
+       default: return -1;
+       }
+ }
+ static int insn_bytes_to_bpf_size(__u32 sz)
+ {
+       switch (sz) {
+       case 8: return BPF_DW;
+       case 4: return BPF_W;
+       case 2: return BPF_H;
+       case 1: return BPF_B;
+       default: return -1;
+       }
+ }
  /*
   * Patch relocatable BPF instruction.
   *
   * spec, and is checked before patching instruction. If actual insn->imm value
   * is wrong, bail out with error.
   *
-  * Currently three kinds of BPF instructions are supported:
+  * Currently supported classes of BPF instruction are:
   * 1. rX = <imm> (assignment with immediate operand);
   * 2. rX += <imm> (arithmetic operations with immediate operand);
-  * 3. rX = <imm64> (load with 64-bit immediate value).
+  * 3. rX = <imm64> (load with 64-bit immediate value);
+  * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
+  * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
+  * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
   */
  static int bpf_core_patch_insn(struct bpf_program *prog,
                               const struct bpf_core_relo *relo,
        class = BPF_CLASS(insn->code);
  
        if (res->poison) {
+ poison:
                /* poison second part of ldimm64 to avoid confusing error from
                 * verifier about "unknown opcode 00"
                 */
                                prog->name, relo_idx, insn_idx, new_val);
                        return -ERANGE;
                }
+               if (res->fail_memsz_adjust) {
+                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
+                               "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
+                               prog->name, relo_idx, insn_idx);
+                       goto poison;
+               }
                orig_val = insn->off;
                insn->off = new_val;
                pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
                         prog->name, relo_idx, insn_idx, orig_val, new_val);
+               if (res->new_sz != res->orig_sz) {
+                       int insn_bytes_sz, insn_bpf_sz;
+                       insn_bytes_sz = insn_bpf_size_to_bytes(insn);
+                       if (insn_bytes_sz != res->orig_sz) {
+                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
+                                       prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
+                               return -EINVAL;
+                       }
+                       insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
+                       if (insn_bpf_sz < 0) {
+                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
+                                       prog->name, relo_idx, insn_idx, res->new_sz);
+                               return -EINVAL;
+                       }
+                       insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
+                       pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
+                                prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
+               }
                break;
        case BPF_LD: {
                __u64 imm;
@@@ -5691,7 -5842,7 +5842,7 @@@ bpf_object__relocate_core(struct bpf_ob
                return 0;
  
        if (targ_btf_path)
-               targ_btf = btf__parse_elf(targ_btf_path, NULL);
+               targ_btf = btf__parse(targ_btf_path, NULL);
        else
                targ_btf = obj->btf_vmlinux;
        if (IS_ERR_OR_NULL(targ_btf)) {
                                err = -EINVAL;
                                goto out;
                        }
+                       /* no need to apply CO-RE relocation if the program is
+                        * not going to be loaded
+                        */
+                       if (!prog->load)
+                               continue;
  
                        err = bpf_core_apply_relo(prog, rec, i, obj->btf,
                                                  targ_btf, cand_cache);
@@@ -5800,8 -5956,13 +5956,13 @@@ bpf_object__relocate_data(struct bpf_ob
                                insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
                                insn[1].imm = ext->kcfg.data_off;
                        } else /* EXT_KSYM */ {
-                               insn[0].imm = (__u32)ext->ksym.addr;
-                               insn[1].imm = ext->ksym.addr >> 32;
+                               if (ext->ksym.type_id) { /* typed ksyms */
+                                       insn[0].src_reg = BPF_PSEUDO_BTF_ID;
+                                       insn[0].imm = ext->ksym.vmlinux_btf_id;
+                               } else { /* typeless ksyms */
+                                       insn[0].imm = (__u32)ext->ksym.addr;
+                                       insn[1].imm = ext->ksym.addr >> 32;
+                               }
                        }
                        relo->processed = true;
                        break;
        return err;
  }
  
+ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+ {
+       struct extern_desc *ext;
+       int i, id;
+       for (i = 0; i < obj->nr_extern; i++) {
+               const struct btf_type *targ_var, *targ_type;
+               __u32 targ_type_id, local_type_id;
+               const char *targ_var_name;
+               int ret;
+               ext = &obj->externs[i];
+               if (ext->type != EXT_KSYM || !ext->ksym.type_id)
+                       continue;
+               id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name,
+                                           BTF_KIND_VAR);
+               if (id <= 0) {
+                       pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n",
+                               ext->name);
+                       return -ESRCH;
+               }
+               /* find local type_id */
+               local_type_id = ext->ksym.type_id;
+               /* find target type_id */
+               targ_var = btf__type_by_id(obj->btf_vmlinux, id);
+               targ_var_name = btf__name_by_offset(obj->btf_vmlinux,
+                                                   targ_var->name_off);
+               targ_type = skip_mods_and_typedefs(obj->btf_vmlinux,
+                                                  targ_var->type,
+                                                  &targ_type_id);
+               ret = bpf_core_types_are_compat(obj->btf, local_type_id,
+                                               obj->btf_vmlinux, targ_type_id);
+               if (ret <= 0) {
+                       const struct btf_type *local_type;
+                       const char *targ_name, *local_name;
+                       local_type = btf__type_by_id(obj->btf, local_type_id);
+                       local_name = btf__name_by_offset(obj->btf,
+                                                        local_type->name_off);
+                       targ_name = btf__name_by_offset(obj->btf_vmlinux,
+                                                       targ_type->name_off);
+                       pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
+                               ext->name, local_type_id,
+                               btf_kind_str(local_type), local_name, targ_type_id,
+                               btf_kind_str(targ_type), targ_name);
+                       return -EINVAL;
+               }
+               ext->is_set = true;
+               ext->ksym.vmlinux_btf_id = id;
+               pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
+                        ext->name, id, btf_kind_str(targ_var), targ_var_name);
+       }
+       return 0;
+ }
  static int bpf_object__resolve_externs(struct bpf_object *obj,
                                       const char *extra_kconfig)
  {
        bool need_config = false, need_kallsyms = false;
+       bool need_vmlinux_btf = false;
        struct extern_desc *ext;
        void *kcfg_data = NULL;
        int err, i;
                           strncmp(ext->name, "CONFIG_", 7) == 0) {
                        need_config = true;
                } else if (ext->type == EXT_KSYM) {
-                       need_kallsyms = true;
+                       if (ext->ksym.type_id)
+                               need_vmlinux_btf = true;
+                       else
+                               need_kallsyms = true;
                } else {
                        pr_warn("unrecognized extern '%s'\n", ext->name);
                        return -EINVAL;
                if (err)
                        return -EINVAL;
        }
+       if (need_vmlinux_btf) {
+               err = bpf_object__resolve_ksyms_btf_id(obj);
+               if (err)
+                       return -EINVAL;
+       }
        for (i = 0; i < obj->nr_extern; i++) {
                ext = &obj->externs[i];
  
@@@ -7028,10 -7259,10 +7259,10 @@@ int bpf_object__load_xattr(struct bpf_o
        }
  
        err = bpf_object__probe_loading(obj);
+       err = err ? : bpf_object__load_vmlinux_btf(obj);
        err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
        err = err ? : bpf_object__sanitize_and_load_btf(obj);
        err = err ? : bpf_object__sanitize_maps(obj);
-       err = err ? : bpf_object__load_vmlinux_btf(obj);
        err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
        err = err ? : bpf_object__create_maps(obj);
        err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
@@@ -8071,7 -8302,7 +8302,7 @@@ static const struct bpf_sec_def section
                                                BPF_XDP_DEVMAP),
        BPF_EAPROG_SEC("xdp_cpumap/",           BPF_PROG_TYPE_XDP,
                                                BPF_XDP_CPUMAP),
 -      BPF_EAPROG_SEC("xdp",                   BPF_PROG_TYPE_XDP,
 +      BPF_APROG_SEC("xdp",                    BPF_PROG_TYPE_XDP,
                                                BPF_XDP),
        BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
        BPF_PROG_SEC("lwt_in",                  BPF_PROG_TYPE_LWT_IN),
@@@ -10353,9 -10584,8 +10584,8 @@@ int bpf_program__set_attach_target(stru
                btf_id = libbpf_find_prog_btf_id(attach_func_name,
                                                 attach_prog_fd);
        else
-               btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
-                                              attach_func_name,
-                                              prog->expected_attach_type);
+               btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
+                                                   prog->expected_attach_type);
  
        if (btf_id < 0)
                return btf_id;