Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
authorJakub Kicinski <kuba@kernel.org>
Sat, 14 Nov 2020 17:13:40 +0000 (09:13 -0800)
committerJakub Kicinski <kuba@kernel.org>
Sat, 14 Nov 2020 17:13:41 +0000 (09:13 -0800)
Daniel Borkmann says:

====================
pull-request: bpf-next 2020-11-14

1) Add BTF generation for kernel modules and extend BTF infra in kernel
   e.g. support for split BTF loading and validation, from Andrii Nakryiko.

2) Support for pointers beyond pkt_end to recognize LLVM generated patterns
   on inlined branch conditions, from Alexei Starovoitov.

3) Implements bpf_local_storage for task_struct for BPF LSM, from KP Singh.

4) Enable FENTRY/FEXIT/RAW_TP tracing program to use the bpf_sk_storage
   infra, from Martin KaFai Lau.

5) Add XDP bulk APIs that introduce a defer/flush mechanism to optimize the
   XDP_REDIRECT path, from Lorenzo Bianconi.

6) Fix a potential (although rather theoretical) deadlock of hashtab in NMI
   context, from Song Liu.

7) Fixes for cross and out-of-tree build of bpftool and runqslower allowing build
   for different target archs on same source tree, from Jean-Philippe Brucker.

8) Fix error path in htab_map_alloc() triggered from syzbot, from Eric Dumazet.

9) Move functionality from test_tcpbpf_user into the test_progs framework so it
   can run in BPF CI, from Alexander Duyck.

10) Lift hashtab key_size limit to be larger than MAX_BPF_STACK, from Florian Lehner.

Note that for the fix from Song we have seen a sparse report on context
imbalance which requires changes in sparse itself for proper annotation
detection where this is currently being discussed on linux-sparse among
developers [0]. Once we have more clarification/guidance after their fix,
Song will follow-up.

  [0] https://lore.kernel.org/linux-sparse/CAHk-=wh4bx8A8dHnX612MsDO13st6uzAz1mJ1PaHHVevJx_ZCw@mail.gmail.com/T/
      https://lore.kernel.org/linux-sparse/20201109221345.uklbp3lzgq6g42zb@ltop.local/T/

* git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (66 commits)
  net: mlx5: Add xdp tx return bulking support
  net: mvpp2: Add xdp tx return bulking support
  net: mvneta: Add xdp tx return bulking support
  net: page_pool: Add bulk support for ptr_ring
  net: xdp: Introduce bulking for xdp tx return path
  bpf: Expose bpf_d_path helper to sleepable LSM hooks
  bpf: Augment the set of sleepable LSM hooks
  bpf: selftest: Use bpf_sk_storage in FENTRY/FEXIT/RAW_TP
  bpf: Allow using bpf_sk_storage in FENTRY/FEXIT/RAW_TP
  bpf: Rename some functions in bpf_sk_storage
  bpf: Folding omem_charge() into sk_storage_charge()
  selftests/bpf: Add asm tests for pkt vs pkt_end comparison.
  selftests/bpf: Add skb_pkt_end test
  bpf: Support for pointers beyond pkt_end.
  tools/bpf: Always run the *-clean recipes
  tools/bpf: Add bootstrap/ to .gitignore
  bpf: Fix NULL dereference in bpf_task_storage
  tools/bpftool: Fix build slowdown
  tools/runqslower: Build bpftool using HOSTCC
  tools/runqslower: Enable out-of-tree build
  ...
====================

Link: https://lore.kernel.org/r/20201114020819.29584-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1  2 
include/linux/module.h
kernel/bpf/Makefile
kernel/bpf/bpf_lsm.c
kernel/bpf/hashtab.c
lib/Kconfig.debug

diff --combined include/linux/module.h
@@@ -278,7 -278,7 +278,7 @@@ extern typeof(name) __mod_##type##__##n
                .version        = _version,                             \
        };                                                              \
        static const struct module_version_attribute                    \
 -      __used __attribute__ ((__section__ ("__modver")))               \
 +      __used __section("__modver")                                    \
        * __moduleparam_const __modver_attr = &___modver_attr
  #endif
  
@@@ -475,6 -475,10 +475,10 @@@ struct module 
        unsigned int num_bpf_raw_events;
        struct bpf_raw_event_map *bpf_raw_events;
  #endif
+ #ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+       unsigned int btf_data_size;
+       void *btf_data;
+ #endif
  #ifdef CONFIG_JUMP_LABEL
        struct jump_entry *jump_entries;
        unsigned int num_jump_entries;
@@@ -740,7 -744,7 +744,7 @@@ static inline bool within_module(unsign
  }
  
  /* Get/put a kernel symbol (calls should be symmetric) */
 -#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); })
 +#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak,visibility("hidden"))); &(x); })
  #define symbol_put(x) do { } while (0)
  #define symbol_put_addr(x) do { } while (0)
  
diff --combined kernel/bpf/Makefile
@@@ -1,15 -1,12 +1,16 @@@
  # SPDX-License-Identifier: GPL-2.0
  obj-y := core.o
 -CFLAGS_core.o += $(call cc-disable-warning, override-init)
 +ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y)
 +# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
 +cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
 +endif
 +CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
  
  obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
  obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
  obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
  obj-${CONFIG_BPF_LSM}   += bpf_inode_storage.o
+ obj-${CONFIG_BPF_LSM}   += bpf_task_storage.o
  obj-$(CONFIG_BPF_SYSCALL) += disasm.o
  obj-$(CONFIG_BPF_JIT) += trampoline.o
  obj-$(CONFIG_BPF_SYSCALL) += btf.o
diff --combined kernel/bpf/bpf_lsm.c
@@@ -27,11 -27,7 +27,11 @@@ noinline RET bpf_lsm_##NAME(__VA_ARGS__
  #include <linux/lsm_hook_defs.h>
  #undef LSM_HOOK
  
 -#define BPF_LSM_SYM_PREFX  "bpf_lsm_"
 +#define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME)
 +BTF_SET_START(bpf_lsm_hooks)
 +#include <linux/lsm_hook_defs.h>
 +#undef LSM_HOOK
 +BTF_SET_END(bpf_lsm_hooks)
  
  int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
                        const struct bpf_prog *prog)
@@@ -42,7 -38,8 +42,7 @@@
                return -EINVAL;
        }
  
 -      if (strncmp(BPF_LSM_SYM_PREFX, prog->aux->attach_func_name,
 -                  sizeof(BPF_LSM_SYM_PREFX) - 1)) {
 +      if (!btf_id_set_contains(&bpf_lsm_hooks, prog->aux->attach_btf_id)) {
                bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n",
                        prog->aux->attach_btf_id, prog->aux->attach_func_name);
                return -EINVAL;
@@@ -63,11 -60,99 +63,99 @@@ bpf_lsm_func_proto(enum bpf_func_id fun
                return &bpf_sk_storage_get_proto;
        case BPF_FUNC_sk_storage_delete:
                return &bpf_sk_storage_delete_proto;
+       case BPF_FUNC_spin_lock:
+               return &bpf_spin_lock_proto;
+       case BPF_FUNC_spin_unlock:
+               return &bpf_spin_unlock_proto;
+       case BPF_FUNC_task_storage_get:
+               return &bpf_task_storage_get_proto;
+       case BPF_FUNC_task_storage_delete:
+               return &bpf_task_storage_delete_proto;
        default:
                return tracing_prog_func_proto(func_id, prog);
        }
  }
  
+ /* The set of hooks which are called without pagefaults disabled and are allowed
+  * to "sleep" and thus can be used for sleeable BPF programs.
+  */
+ BTF_SET_START(sleepable_lsm_hooks)
+ BTF_ID(func, bpf_lsm_bpf)
+ BTF_ID(func, bpf_lsm_bpf_map)
+ BTF_ID(func, bpf_lsm_bpf_map_alloc_security)
+ BTF_ID(func, bpf_lsm_bpf_map_free_security)
+ BTF_ID(func, bpf_lsm_bpf_prog)
+ BTF_ID(func, bpf_lsm_bprm_check_security)
+ BTF_ID(func, bpf_lsm_bprm_committed_creds)
+ BTF_ID(func, bpf_lsm_bprm_committing_creds)
+ BTF_ID(func, bpf_lsm_bprm_creds_for_exec)
+ BTF_ID(func, bpf_lsm_bprm_creds_from_file)
+ BTF_ID(func, bpf_lsm_capget)
+ BTF_ID(func, bpf_lsm_capset)
+ BTF_ID(func, bpf_lsm_cred_prepare)
+ BTF_ID(func, bpf_lsm_file_ioctl)
+ BTF_ID(func, bpf_lsm_file_lock)
+ BTF_ID(func, bpf_lsm_file_open)
+ BTF_ID(func, bpf_lsm_file_receive)
+ BTF_ID(func, bpf_lsm_inet_conn_established)
+ BTF_ID(func, bpf_lsm_inode_create)
+ BTF_ID(func, bpf_lsm_inode_free_security)
+ BTF_ID(func, bpf_lsm_inode_getattr)
+ BTF_ID(func, bpf_lsm_inode_getxattr)
+ BTF_ID(func, bpf_lsm_inode_mknod)
+ BTF_ID(func, bpf_lsm_inode_need_killpriv)
+ BTF_ID(func, bpf_lsm_inode_post_setxattr)
+ BTF_ID(func, bpf_lsm_inode_readlink)
+ BTF_ID(func, bpf_lsm_inode_rename)
+ BTF_ID(func, bpf_lsm_inode_rmdir)
+ BTF_ID(func, bpf_lsm_inode_setattr)
+ BTF_ID(func, bpf_lsm_inode_setxattr)
+ BTF_ID(func, bpf_lsm_inode_symlink)
+ BTF_ID(func, bpf_lsm_inode_unlink)
+ BTF_ID(func, bpf_lsm_kernel_module_request)
+ BTF_ID(func, bpf_lsm_kernfs_init_security)
+ BTF_ID(func, bpf_lsm_key_free)
+ BTF_ID(func, bpf_lsm_mmap_file)
+ BTF_ID(func, bpf_lsm_netlink_send)
+ BTF_ID(func, bpf_lsm_path_notify)
+ BTF_ID(func, bpf_lsm_release_secctx)
+ BTF_ID(func, bpf_lsm_sb_alloc_security)
+ BTF_ID(func, bpf_lsm_sb_eat_lsm_opts)
+ BTF_ID(func, bpf_lsm_sb_kern_mount)
+ BTF_ID(func, bpf_lsm_sb_mount)
+ BTF_ID(func, bpf_lsm_sb_remount)
+ BTF_ID(func, bpf_lsm_sb_set_mnt_opts)
+ BTF_ID(func, bpf_lsm_sb_show_options)
+ BTF_ID(func, bpf_lsm_sb_statfs)
+ BTF_ID(func, bpf_lsm_sb_umount)
+ BTF_ID(func, bpf_lsm_settime)
+ BTF_ID(func, bpf_lsm_socket_accept)
+ BTF_ID(func, bpf_lsm_socket_bind)
+ BTF_ID(func, bpf_lsm_socket_connect)
+ BTF_ID(func, bpf_lsm_socket_create)
+ BTF_ID(func, bpf_lsm_socket_getpeername)
+ BTF_ID(func, bpf_lsm_socket_getpeersec_dgram)
+ BTF_ID(func, bpf_lsm_socket_getsockname)
+ BTF_ID(func, bpf_lsm_socket_getsockopt)
+ BTF_ID(func, bpf_lsm_socket_listen)
+ BTF_ID(func, bpf_lsm_socket_post_create)
+ BTF_ID(func, bpf_lsm_socket_recvmsg)
+ BTF_ID(func, bpf_lsm_socket_sendmsg)
+ BTF_ID(func, bpf_lsm_socket_shutdown)
+ BTF_ID(func, bpf_lsm_socket_socketpair)
+ BTF_ID(func, bpf_lsm_syslog)
+ BTF_ID(func, bpf_lsm_task_alloc)
+ BTF_ID(func, bpf_lsm_task_getsecid)
+ BTF_ID(func, bpf_lsm_task_prctl)
+ BTF_ID(func, bpf_lsm_task_setscheduler)
+ BTF_ID(func, bpf_lsm_task_to_inode)
+ BTF_SET_END(sleepable_lsm_hooks)
+ bool bpf_lsm_is_sleepable_hook(u32 btf_id)
+ {
+       return btf_id_set_contains(&sleepable_lsm_hooks, btf_id);
+ }
  const struct bpf_prog_ops lsm_prog_ops = {
  };
  
diff --combined kernel/bpf/hashtab.c
@@@ -86,6 -86,9 +86,9 @@@ struct bucket 
        };
  };
  
+ #define HASHTAB_MAP_LOCK_COUNT 8
+ #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1)
  struct bpf_htab {
        struct bpf_map map;
        struct bucket *buckets;
        u32 n_buckets;  /* number of hash buckets */
        u32 elem_size;  /* size of each element in bytes */
        u32 hashrnd;
+       struct lock_class_key lockdep_key;
+       int __percpu *map_locked[HASHTAB_MAP_LOCK_COUNT];
  };
  
  /* each htab element is struct htab_elem + key + value */
@@@ -138,33 -143,53 +143,53 @@@ static void htab_init_buckets(struct bp
  
        for (i = 0; i < htab->n_buckets; i++) {
                INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
-               if (htab_use_raw_lock(htab))
+               if (htab_use_raw_lock(htab)) {
                        raw_spin_lock_init(&htab->buckets[i].raw_lock);
-               else
+                       lockdep_set_class(&htab->buckets[i].raw_lock,
+                                         &htab->lockdep_key);
+               } else {
                        spin_lock_init(&htab->buckets[i].lock);
+                       lockdep_set_class(&htab->buckets[i].lock,
+                                         &htab->lockdep_key);
+               }
        }
  }
  
- static inline unsigned long htab_lock_bucket(const struct bpf_htab *htab,
-                                            struct bucket *b)
+ static inline int htab_lock_bucket(const struct bpf_htab *htab,
+                                  struct bucket *b, u32 hash,
+                                  unsigned long *pflags)
  {
        unsigned long flags;
  
+       hash = hash & HASHTAB_MAP_LOCK_MASK;
+       migrate_disable();
+       if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
+               __this_cpu_dec(*(htab->map_locked[hash]));
+               migrate_enable();
+               return -EBUSY;
+       }
        if (htab_use_raw_lock(htab))
                raw_spin_lock_irqsave(&b->raw_lock, flags);
        else
                spin_lock_irqsave(&b->lock, flags);
-       return flags;
+       *pflags = flags;
+       return 0;
  }
  
  static inline void htab_unlock_bucket(const struct bpf_htab *htab,
-                                     struct bucket *b,
+                                     struct bucket *b, u32 hash,
                                      unsigned long flags)
  {
+       hash = hash & HASHTAB_MAP_LOCK_MASK;
        if (htab_use_raw_lock(htab))
                raw_spin_unlock_irqrestore(&b->raw_lock, flags);
        else
                spin_unlock_irqrestore(&b->lock, flags);
+       __this_cpu_dec(*(htab->map_locked[hash]));
+       migrate_enable();
  }
  
  static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
@@@ -390,17 -415,11 +415,11 @@@ static int htab_map_alloc_check(union b
            attr->value_size == 0)
                return -EINVAL;
  
-       if (attr->key_size > MAX_BPF_STACK)
-               /* eBPF programs initialize keys on stack, so they cannot be
-                * larger than max stack size
-                */
-               return -E2BIG;
-       if (attr->value_size >= KMALLOC_MAX_SIZE -
-           MAX_BPF_STACK - sizeof(struct htab_elem))
-               /* if value_size is bigger, the user space won't be able to
-                * access the elements via bpf syscall. This check also makes
-                * sure that the elem_size doesn't overflow and it's
+       if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE -
+          sizeof(struct htab_elem))
+               /* if key_size + value_size is bigger, the user space won't be
+                * able to access the elements via bpf syscall. This check
+                * also makes sure that the elem_size doesn't overflow and it's
                 * kmalloc-able later in htab_map_update_elem()
                 */
                return -E2BIG;
@@@ -422,13 -441,15 +441,15 @@@ static struct bpf_map *htab_map_alloc(u
        bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
        bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
        struct bpf_htab *htab;
+       int err, i;
        u64 cost;
-       int err;
  
        htab = kzalloc(sizeof(*htab), GFP_USER);
        if (!htab)
                return ERR_PTR(-ENOMEM);
  
+       lockdep_register_key(&htab->lockdep_key);
        bpf_map_init_from_attr(&htab->map, attr);
  
        if (percpu_lru) {
        if (!htab->buckets)
                goto free_charge;
  
+       for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) {
+               htab->map_locked[i] = __alloc_percpu_gfp(sizeof(int),
+                                                        sizeof(int), GFP_USER);
+               if (!htab->map_locked[i])
+                       goto free_map_locked;
+       }
        if (htab->map.map_flags & BPF_F_ZERO_SEED)
                htab->hashrnd = 0;
        else
        if (prealloc) {
                err = prealloc_init(htab);
                if (err)
-                       goto free_buckets;
+                       goto free_map_locked;
  
                if (!percpu && !lru) {
                        /* lru itself can remove the least used element, so
  
  free_prealloc:
        prealloc_destroy(htab);
- free_buckets:
+ free_map_locked:
+       for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
+               free_percpu(htab->map_locked[i]);
        bpf_map_area_free(htab->buckets);
  free_charge:
        bpf_map_charge_finish(&htab->map.memory);
  free_htab:
+       lockdep_unregister_key(&htab->lockdep_key);
        kfree(htab);
        return ERR_PTR(err);
  }
@@@ -687,12 -718,15 +718,15 @@@ static bool htab_lru_map_delete_node(vo
        struct hlist_nulls_node *n;
        unsigned long flags;
        struct bucket *b;
+       int ret;
  
        tgt_l = container_of(node, struct htab_elem, lru_node);
        b = __select_bucket(htab, tgt_l->hash);
        head = &b->head;
  
-       flags = htab_lock_bucket(htab, b);
+       ret = htab_lock_bucket(htab, b, tgt_l->hash, &flags);
+       if (ret)
+               return false;
  
        hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
                if (l == tgt_l) {
                        break;
                }
  
-       htab_unlock_bucket(htab, b, flags);
+       htab_unlock_bucket(htab, b, tgt_l->hash, flags);
  
        return l == tgt_l;
  }
@@@ -821,32 -855,6 +855,32 @@@ static void pcpu_copy_value(struct bpf_
        }
  }
  
 +static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
 +                          void *value, bool onallcpus)
 +{
 +      /* When using prealloc and not setting the initial value on all cpus,
 +       * zero-fill element values for other cpus (just as what happens when
 +       * not using prealloc). Otherwise, bpf program has no way to ensure
 +       * known initial values for cpus other than current one
 +       * (onallcpus=false always when coming from bpf prog).
 +       */
 +      if (htab_is_prealloc(htab) && !onallcpus) {
 +              u32 size = round_up(htab->map.value_size, 8);
 +              int current_cpu = raw_smp_processor_id();
 +              int cpu;
 +
 +              for_each_possible_cpu(cpu) {
 +                      if (cpu == current_cpu)
 +                              bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
 +                                              size);
 +                      else
 +                              memset(per_cpu_ptr(pptr, cpu), 0, size);
 +              }
 +      } else {
 +              pcpu_copy_value(htab, pptr, value, onallcpus);
 +      }
 +}
 +
  static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
  {
        return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
@@@ -917,7 -925,7 +951,7 @@@ static struct htab_elem *alloc_htab_ele
                        }
                }
  
 -              pcpu_copy_value(htab, pptr, value, onallcpus);
 +              pcpu_init_value(htab, pptr, value, onallcpus);
  
                if (!prealloc)
                        htab_elem_set_ptr(l_new, key_size, pptr);
@@@ -998,7 -1006,9 +1032,9 @@@ static int htab_map_update_elem(struct 
                 */
        }
  
-       flags = htab_lock_bucket(htab, b);
+       ret = htab_lock_bucket(htab, b, hash, &flags);
+       if (ret)
+               return ret;
  
        l_old = lookup_elem_raw(head, hash, key, key_size);
  
        }
        ret = 0;
  err:
-       htab_unlock_bucket(htab, b, flags);
+       htab_unlock_bucket(htab, b, hash, flags);
        return ret;
  }
  
@@@ -1077,7 -1087,9 +1113,9 @@@ static int htab_lru_map_update_elem(str
                return -ENOMEM;
        memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
  
-       flags = htab_lock_bucket(htab, b);
+       ret = htab_lock_bucket(htab, b, hash, &flags);
+       if (ret)
+               return ret;
  
        l_old = lookup_elem_raw(head, hash, key, key_size);
  
        ret = 0;
  
  err:
-       htab_unlock_bucket(htab, b, flags);
+       htab_unlock_bucket(htab, b, hash, flags);
  
        if (ret)
                bpf_lru_push_free(&htab->lru, &l_new->lru_node);
@@@ -1131,7 -1143,9 +1169,9 @@@ static int __htab_percpu_map_update_ele
        b = __select_bucket(htab, hash);
        head = &b->head;
  
-       flags = htab_lock_bucket(htab, b);
+       ret = htab_lock_bucket(htab, b, hash, &flags);
+       if (ret)
+               return ret;
  
        l_old = lookup_elem_raw(head, hash, key, key_size);
  
        }
        ret = 0;
  err:
-       htab_unlock_bucket(htab, b, flags);
+       htab_unlock_bucket(htab, b, hash, flags);
        return ret;
  }
  
@@@ -1194,7 -1208,9 +1234,9 @@@ static int __htab_lru_percpu_map_update
                        return -ENOMEM;
        }
  
-       flags = htab_lock_bucket(htab, b);
+       ret = htab_lock_bucket(htab, b, hash, &flags);
+       if (ret)
+               return ret;
  
        l_old = lookup_elem_raw(head, hash, key, key_size);
  
                pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
                                value, onallcpus);
        } else {
 -              pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
 +              pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
                                value, onallcpus);
                hlist_nulls_add_head_rcu(&l_new->hash_node, head);
                l_new = NULL;
        }
        ret = 0;
  err:
-       htab_unlock_bucket(htab, b, flags);
+       htab_unlock_bucket(htab, b, hash, flags);
        if (l_new)
                bpf_lru_push_free(&htab->lru, &l_new->lru_node);
        return ret;
@@@ -1244,7 -1260,7 +1286,7 @@@ static int htab_map_delete_elem(struct 
        struct htab_elem *l;
        unsigned long flags;
        u32 hash, key_size;
-       int ret = -ENOENT;
+       int ret;
  
        WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
  
        b = __select_bucket(htab, hash);
        head = &b->head;
  
-       flags = htab_lock_bucket(htab, b);
+       ret = htab_lock_bucket(htab, b, hash, &flags);
+       if (ret)
+               return ret;
  
        l = lookup_elem_raw(head, hash, key, key_size);
  
        if (l) {
                hlist_nulls_del_rcu(&l->hash_node);
                free_htab_elem(htab, l);
-               ret = 0;
+       } else {
+               ret = -ENOENT;
        }
  
-       htab_unlock_bucket(htab, b, flags);
+       htab_unlock_bucket(htab, b, hash, flags);
        return ret;
  }
  
@@@ -1276,7 -1295,7 +1321,7 @@@ static int htab_lru_map_delete_elem(str
        struct htab_elem *l;
        unsigned long flags;
        u32 hash, key_size;
-       int ret = -ENOENT;
+       int ret;
  
        WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
  
        b = __select_bucket(htab, hash);
        head = &b->head;
  
-       flags = htab_lock_bucket(htab, b);
+       ret = htab_lock_bucket(htab, b, hash, &flags);
+       if (ret)
+               return ret;
  
        l = lookup_elem_raw(head, hash, key, key_size);
  
-       if (l) {
+       if (l)
                hlist_nulls_del_rcu(&l->hash_node);
-               ret = 0;
-       }
+       else
+               ret = -ENOENT;
  
-       htab_unlock_bucket(htab, b, flags);
+       htab_unlock_bucket(htab, b, hash, flags);
        if (l)
                bpf_lru_push_free(&htab->lru, &l->lru_node);
        return ret;
@@@ -1321,6 -1342,7 +1368,7 @@@ static void delete_all_elements(struct 
  static void htab_map_free(struct bpf_map *map)
  {
        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       int i;
  
        /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback.
         * bpf_free_used_maps() is called after bpf prog is no longer executing.
  
        free_percpu(htab->extra_elems);
        bpf_map_area_free(htab->buckets);
+       for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
+               free_percpu(htab->map_locked[i]);
+       lockdep_unregister_key(&htab->lockdep_key);
        kfree(htab);
  }
  
@@@ -1441,8 -1466,11 +1492,11 @@@ again_nocopy
        b = &htab->buckets[batch];
        head = &b->head;
        /* do not grab the lock unless need it (bucket_cnt > 0). */
-       if (locked)
-               flags = htab_lock_bucket(htab, b);
+       if (locked) {
+               ret = htab_lock_bucket(htab, b, batch, &flags);
+               if (ret)
+                       goto next_batch;
+       }
  
        bucket_cnt = 0;
        hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
                /* Note that since bucket_cnt > 0 here, it is implicit
                 * that the locked was grabbed, so release it.
                 */
-               htab_unlock_bucket(htab, b, flags);
+               htab_unlock_bucket(htab, b, batch, flags);
                rcu_read_unlock();
                bpf_enable_instrumentation();
                goto after_loop;
                /* Note that since bucket_cnt > 0 here, it is implicit
                 * that the locked was grabbed, so release it.
                 */
-               htab_unlock_bucket(htab, b, flags);
+               htab_unlock_bucket(htab, b, batch, flags);
                rcu_read_unlock();
                bpf_enable_instrumentation();
                kvfree(keys);
                dst_val += value_size;
        }
  
-       htab_unlock_bucket(htab, b, flags);
+       htab_unlock_bucket(htab, b, batch, flags);
        locked = false;
  
        while (node_to_free) {
diff --combined lib/Kconfig.debug
@@@ -274,6 -274,15 +274,15 @@@ config DEBUG_INFO_BT
          Turning this on expects presence of pahole tool, which will convert
          DWARF type info into equivalent deduplicated BTF type info.
  
+ config PAHOLE_HAS_SPLIT_BTF
+       def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119")
+ config DEBUG_INFO_BTF_MODULES
+       def_bool y
+       depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
+       help
+         Generate compact split BTF type information for kernel modules.
  config GDB_SCRIPTS
        bool "Provide GDB scripts for kernel debugging"
        help
@@@ -1870,7 -1879,6 +1879,7 @@@ config KCO
        depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
        select DEBUG_FS
        select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
 +      select SKB_EXTENSIONS if NET
        help
          KCOV exposes kernel code coverage information in a form suitable
          for coverage-guided fuzzing (randomized testing).
@@@ -2447,6 -2455,4 +2456,6 @@@ config HYPERV_TESTIN
  
  endmenu # "Kernel Testing and Coverage"
  
 +source "Documentation/Kconfig"
 +
  endmenu # Kernel hacking