Merge branch 'salted-string-hash'
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 28 Jul 2016 19:26:31 +0000 (12:26 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 28 Jul 2016 19:26:31 +0000 (12:26 -0700)
This changes the vfs dentry hashing to mix in the parent pointer at the
_beginning_ of the hash, rather than at the end.

That actually improves both the hash and the code generation, because we
can move more of the computation to the "static" part of the dcache
setup, and do less at lookup runtime.

It turns out that a lot of other hash users also really wanted to mix in
a base pointer as a 'salt' for the hash, and so the slightly extended
interface ends up working well for other cases too.

Users that want a string hash that is purely about the string pass in a
'salt' pointer of NULL.

* merge branch 'salted-string-hash':
  fs/dcache.c: Save one 32-bit multiply in dcache lookup
  vfs: make the string hashes salt the hash

1  2 
drivers/staging/lustre/lustre/llite/statahead.c
fs/autofs4/waitq.c
fs/cifs/dir.c
fs/dcache.c
fs/fuse/dir.c
fs/logfs/dir.c
fs/nfs/dir.c
net/core/dev.c

   *
   * You should have received a copy of the GNU General Public License
   * version 2 along with this program; If not, see
 - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
 - *
 - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 - * CA 95054 USA or visit www.sun.com if you need additional information or
 - * have any questions.
 + * http://www.gnu.org/licenses/gpl-2.0.html
   *
   * GPL HEADER END
   */
@@@ -170,7 -174,8 +170,8 @@@ static inline int is_omitted_entry(stru
   * Insert it into sai_entries tail when init.
   */
  static struct ll_sa_entry *
- ll_sa_entry_alloc(struct ll_statahead_info *sai, __u64 index,
+ ll_sa_entry_alloc(struct dentry *parent,
+                 struct ll_statahead_info *sai, __u64 index,
                  const char *name, int len)
  {
        struct ll_inode_info *lli;
        dname = (char *)entry + sizeof(struct ll_sa_entry);
        memcpy(dname, name, len);
        dname[len] = 0;
-       entry->se_qstr.hash = full_name_hash(name, len);
+       entry->se_qstr.hash = full_name_hash(parent, name, len);
        entry->se_qstr.len = len;
        entry->se_qstr.name = dname;
  
@@@ -646,7 -652,7 +648,7 @@@ static void ll_post_statahead(struct ll
                }
        }
  
 -      it->d.lustre.it_lock_handle = entry->se_handle;
 +      it->it_lock_handle = entry->se_handle;
        rc = md_revalidate_lock(ll_i2mdexp(dir), it, ll_inode2fid(dir), NULL);
        if (rc != 1) {
                rc = -EAGAIN;
@@@ -700,7 -706,7 +702,7 @@@ static int ll_statahead_interpret(struc
                 * process enqueues lock on child with parent lock held, eg.
                 * unlink.
                 */
 -              handle = it->d.lustre.it_lock_handle;
 +              handle = it->it_lock_handle;
                ll_intent_drop_lock(it);
        }
  
@@@ -850,7 -856,7 +852,7 @@@ static int do_sa_revalidate(struct inod
  {
        struct inode         *inode = d_inode(dentry);
        struct lookup_intent      it = { .it_op = IT_GETATTR,
 -                                       .d.lustre.it_lock_handle = 0 };
 +                                       .it_lock_handle = 0 };
        struct md_enqueue_info   *minfo;
        struct ldlm_enqueue_info *einfo;
        int rc;
        rc = md_revalidate_lock(ll_i2mdexp(dir), &it, ll_inode2fid(inode),
                                NULL);
        if (rc == 1) {
 -              entry->se_handle = it.d.lustre.it_lock_handle;
 +              entry->se_handle = it.it_lock_handle;
                ll_intent_release(&it);
                return 1;
        }
@@@ -898,7 -904,7 +900,7 @@@ static void ll_statahead_one(struct den
        int                    rc;
        int                    rc1;
  
-       entry = ll_sa_entry_alloc(sai, sai->sai_index, entry_name,
+       entry = ll_sa_entry_alloc(parent, sai, sai->sai_index, entry_name,
                                  entry_name_len);
        if (IS_ERR(entry))
                return;
@@@ -1569,7 -1575,7 +1571,7 @@@ int do_statahead_enter(struct inode *di
                if (entry->se_stat == SA_ENTRY_SUCC && entry->se_inode) {
                        struct inode *inode = entry->se_inode;
                        struct lookup_intent it = { .it_op = IT_GETATTR,
 -                                                  .d.lustre.it_lock_handle =
 +                                                  .it_lock_handle =
                                                     entry->se_handle };
                        __u64 bits;
  
diff --combined fs/autofs4/waitq.c
@@@ -66,12 -66,11 +66,12 @@@ static int autofs4_write(struct autofs_
        set_fs(KERNEL_DS);
  
        mutex_lock(&sbi->pipe_mutex);
 -      wr = __vfs_write(file, data, bytes, &file->f_pos);
 -      while (bytes && wr) {
 +      while (bytes) {
 +              wr = __vfs_write(file, data, bytes, &file->f_pos);
 +              if (wr <= 0)
 +                      break;
                data += wr;
                bytes -= wr;
 -              wr = __vfs_write(file, data, bytes, &file->f_pos);
        }
        mutex_unlock(&sbi->pipe_mutex);
  
@@@ -398,7 -397,7 +398,7 @@@ int autofs4_wait(struct autofs_sb_info 
                }
        }
        qstr.name = name;
-       qstr.hash = full_name_hash(name, qstr.len);
+       qstr.hash = full_name_hash(dentry, name, qstr.len);
  
        if (mutex_lock_interruptible(&sbi->wq_mutex)) {
                kfree(qstr.name);
diff --combined fs/cifs/dir.c
@@@ -445,7 -445,7 +445,7 @@@ cifs_atomic_open(struct inode *inode, s
                 * Check for hashed negative dentry. We have already revalidated
                 * the dentry and it is fine. No need to perform another lookup.
                 */
 -              if (!d_unhashed(direntry))
 +              if (!d_in_lookup(direntry))
                        return -ENOENT;
  
                res = cifs_lookup(inode, direntry, 0);
@@@ -856,7 -856,7 +856,7 @@@ static int cifs_ci_hash(const struct de
        wchar_t c;
        int i, charlen;
  
-       hash = init_name_hash();
+       hash = init_name_hash(dentry);
        for (i = 0; i < q->len; i += charlen) {
                charlen = codepage->char2uni(&q->name[i], q->len - i, &c);
                /* error out if we can't convert the character */
diff --combined fs/dcache.c
@@@ -104,11 -104,9 +104,9 @@@ static unsigned int d_hash_shift __read
  
  static struct hlist_bl_head *dentry_hashtable __read_mostly;
  
- static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
-                                       unsigned int hash)
+ static inline struct hlist_bl_head *d_hash(unsigned int hash)
  {
-       hash += (unsigned long) parent / L1_CACHE_BYTES;
-       return dentry_hashtable + hash_32(hash, d_hash_shift);
+       return dentry_hashtable + (hash >> (32 - d_hash_shift));
  }
  
  #define IN_LOOKUP_SHIFT 10
@@@ -488,7 -486,7 +486,7 @@@ void __d_drop(struct dentry *dentry
                if (unlikely(IS_ROOT(dentry)))
                        b = &dentry->d_sb->s_anon;
                else
-                       b = d_hash(dentry->d_parent, dentry->d_name.hash);
+                       b = d_hash(dentry->d_name.hash);
  
                hlist_bl_lock(b);
                __hlist_bl_del(&dentry->d_hash);
@@@ -507,44 -505,6 +505,44 @@@ void d_drop(struct dentry *dentry
  }
  EXPORT_SYMBOL(d_drop);
  
 +static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
 +{
 +      struct dentry *next;
 +      /*
 +       * Inform d_walk() and shrink_dentry_list() that we are no longer
 +       * attached to the dentry tree
 +       */
 +      dentry->d_flags |= DCACHE_DENTRY_KILLED;
 +      if (unlikely(list_empty(&dentry->d_child)))
 +              return;
 +      __list_del_entry(&dentry->d_child);
 +      /*
 +       * Cursors can move around the list of children.  While we'd been
 +       * a normal list member, it didn't matter - ->d_child.next would've
 +       * been updated.  However, from now on it won't be and for the
 +       * things like d_walk() it might end up with a nasty surprise.
 +       * Normally d_walk() doesn't care about cursors moving around -
 +       * ->d_lock on parent prevents that and since a cursor has no children
 +       * of its own, we get through it without ever unlocking the parent.
 +       * There is one exception, though - if we ascend from a child that
 +       * gets killed as soon as we unlock it, the next sibling is found
 +       * using the value left in its ->d_child.next.  And if _that_
 +       * pointed to a cursor, and cursor got moved (e.g. by lseek())
 +       * before d_walk() regains parent->d_lock, we'll end up skipping
 +       * everything the cursor had been moved past.
 +       *
 +       * Solution: make sure that the pointer left behind in ->d_child.next
 +       * points to something that won't be moving around.  I.e. skip the
 +       * cursors.
 +       */
 +      while (dentry->d_child.next != &parent->d_subdirs) {
 +              next = list_entry(dentry->d_child.next, struct dentry, d_child);
 +              if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
 +                      break;
 +              dentry->d_child.next = next->d_child.next;
 +      }
 +}
 +
  static void __dentry_kill(struct dentry *dentry)
  {
        struct dentry *parent = NULL;
        }
        /* if it was on the hash then remove it */
        __d_drop(dentry);
 -      __list_del_entry(&dentry->d_child);
 -      /*
 -       * Inform d_walk() that we are no longer attached to the
 -       * dentry tree
 -       */
 -      dentry->d_flags |= DCACHE_DENTRY_KILLED;
 +      dentry_unlist(dentry, parent);
        if (parent)
                spin_unlock(&parent->d_lock);
        dentry_iput(dentry);
@@@ -1236,9 -1201,6 +1234,9 @@@ resume
                struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
                next = tmp->next;
  
 +              if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
 +                      continue;
 +
                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
  
                ret = enter(data, dentry);
@@@ -1687,16 -1649,6 +1685,16 @@@ struct dentry *d_alloc(struct dentry * 
  }
  EXPORT_SYMBOL(d_alloc);
  
 +struct dentry *d_alloc_cursor(struct dentry * parent)
 +{
 +      struct dentry *dentry = __d_alloc(parent->d_sb, NULL);
 +      if (dentry) {
 +              dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
 +              dentry->d_parent = dget(parent);
 +      }
 +      return dentry;
 +}
 +
  /**
   * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
   * @sb: the superblock
@@@ -1716,7 -1668,7 +1714,7 @@@ struct dentry *d_alloc_name(struct dent
        struct qstr q;
  
        q.name = name;
-       q.hash_len = hashlen_string(name);
+       q.hash_len = hashlen_string(parent, name);
        return d_alloc(parent, &q);
  }
  EXPORT_SYMBOL(d_alloc_name);
@@@ -2140,7 -2092,7 +2138,7 @@@ struct dentry *__d_lookup_rcu(const str
  {
        u64 hashlen = name->hash_len;
        const unsigned char *str = name->name;
-       struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
+       struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
        struct hlist_bl_node *node;
        struct dentry *dentry;
  
@@@ -2257,7 -2209,7 +2255,7 @@@ struct dentry *__d_lookup(const struct 
        unsigned int len = name->len;
        unsigned int hash = name->hash;
        const unsigned char *str = name->name;
-       struct hlist_bl_head *b = d_hash(parent, hash);
+       struct hlist_bl_head *b = d_hash(hash);
        struct hlist_bl_node *node;
        struct dentry *found = NULL;
        struct dentry *dentry;
@@@ -2337,7 -2289,7 +2335,7 @@@ struct dentry *d_hash_and_lookup(struc
         * calculate the standard hash first, as the d_op->d_hash()
         * routine may choose to leave the hash value unchanged.
         */
-       name->hash = full_name_hash(name->name, name->len);
+       name->hash = full_name_hash(dir, name->name, name->len);
        if (dir->d_flags & DCACHE_OP_HASH) {
                int err = dir->d_op->d_hash(dir, name);
                if (unlikely(err < 0))
@@@ -2410,7 -2362,7 +2408,7 @@@ static void __d_rehash(struct dentry * 
  
  static void _d_rehash(struct dentry * entry)
  {
-       __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
+       __d_rehash(entry, d_hash(entry->d_name.hash));
  }
  
  /**
@@@ -2503,6 -2455,7 +2501,6 @@@ retry
                rcu_read_unlock();
                goto retry;
        }
 -      rcu_read_unlock();
        /*
         * No changes for the parent since the beginning of d_lookup().
         * Since all removals from the chain happen with hlist_bl_lock(),
                        continue;
                if (dentry->d_parent != parent)
                        continue;
 -              if (d_unhashed(dentry))
 -                      continue;
                if (parent->d_flags & DCACHE_OP_COMPARE) {
                        int tlen = dentry->d_name.len;
                        const char *tname = dentry->d_name.name;
                        if (dentry_cmp(dentry, str, len))
                                continue;
                }
 -              dget(dentry);
                hlist_bl_unlock(b);
 -              /* somebody is doing lookup for it right now; wait for it */
 +              /* now we can try to grab a reference */
 +              if (!lockref_get_not_dead(&dentry->d_lockref)) {
 +                      rcu_read_unlock();
 +                      goto retry;
 +              }
 +
 +              rcu_read_unlock();
 +              /*
 +               * somebody is likely to be still doing lookup for it;
 +               * wait for them to finish
 +               */
                spin_lock(&dentry->d_lock);
                d_wait_lookup(dentry);
                /*
                dput(new);
                return dentry;
        }
 +      rcu_read_unlock();
        /* we can't take ->d_lock here; it's OK, though. */
        new->d_flags |= DCACHE_PAR_LOOKUP;
        new->d_wait = wq;
@@@ -2874,7 -2819,7 +2872,7 @@@ static void __d_move(struct dentry *den
         * for the same hash queue because of how unlikely it is.
         */
        __d_drop(dentry);
-       __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
+       __d_rehash(dentry, d_hash(target->d_name.hash));
  
        /*
         * Unhash the target (d_delete() is not usable here).  If exchanging
         */
        __d_drop(target);
        if (exchange) {
-               __d_rehash(target,
-                          d_hash(dentry->d_parent, dentry->d_name.hash));
+               __d_rehash(target, d_hash(dentry->d_name.hash));
        }
  
        /* Switch the names.. */
diff --combined fs/fuse/dir.c
@@@ -341,10 -341,8 +341,10 @@@ static struct dentry *fuse_lookup(struc
        struct dentry *newent;
        bool outarg_valid = true;
  
 +      fuse_lock_inode(dir);
        err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
                               &outarg, &inode);
 +      fuse_unlock_inode(dir);
        if (err == -ENOENT) {
                outarg_valid = false;
                err = 0;
@@@ -480,7 -478,7 +480,7 @@@ static int fuse_atomic_open(struct inod
        struct fuse_conn *fc = get_fuse_conn(dir);
        struct dentry *res = NULL;
  
 -      if (d_unhashed(entry)) {
 +      if (d_in_lookup(entry)) {
                res = fuse_lookup(dir, entry, 0);
                if (IS_ERR(res))
                        return PTR_ERR(res);
@@@ -955,6 -953,7 +955,7 @@@ int fuse_reverse_inval_entry(struct sup
        if (!dir)
                goto unlock;
  
+       name->hash = full_name_hash(dir, name->name, name->len);
        entry = d_lookup(dir, name);
        dput(dir);
        if (!entry)
@@@ -1204,7 -1203,7 +1205,7 @@@ static int fuse_direntplus_link(struct 
  
        fc = get_fuse_conn(dir);
  
-       name.hash = full_name_hash(name.name, name.len);
+       name.hash = full_name_hash(parent, name.name, name.len);
        dentry = d_lookup(parent, &name);
        if (!dentry) {
  retry:
@@@ -1343,9 -1342,7 +1344,9 @@@ static int fuse_readdir(struct file *fi
                fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
                               FUSE_READDIR);
        }
 +      fuse_lock_inode(inode);
        fuse_request_send(fc, req);
 +      fuse_unlock_inode(inode);
        nbytes = req->out.args[0].size;
        err = req->out.h.error;
        fuse_put_request(fc, req);
diff --combined fs/logfs/dir.c
@@@ -95,7 -95,7 +95,7 @@@ static int beyond_eof(struct inode *ino
   * of each character and pick a prime nearby, preferably a bit-sparse
   * one.
   */
--static u32 hash_32(const char *s, int len, u32 seed)
++static u32 logfs_hash_32(const char *s, int len, u32 seed)
  {
        u32 hash = seed;
        int i;
@@@ -159,7 -159,7 +159,7 @@@ static struct page *logfs_get_dd_page(s
        struct qstr *name = &dentry->d_name;
        struct page *page;
        struct logfs_disk_dentry *dd;
--      u32 hash = hash_32(name->name, name->len, 0);
++      u32 hash = logfs_hash_32(name->name, name->len, 0);
        pgoff_t index;
        int round;
  
@@@ -370,7 -370,7 +370,7 @@@ static int logfs_write_dir(struct inod
  {
        struct page *page;
        struct logfs_disk_dentry *dd;
--      u32 hash = hash_32(dentry->d_name.name, dentry->d_name.len, 0);
++      u32 hash = logfs_hash_32(dentry->d_name.name, dentry->d_name.len, 0);
        pgoff_t index;
        int round, err;
  
diff --combined fs/nfs/dir.c
@@@ -232,7 -232,7 +232,7 @@@ int nfs_readdir_make_qstr(struct qstr *
         * in a page cache page which kmemleak does not scan.
         */
        kmemleak_not_leak(string->name);
-       string->hash = full_name_hash(name, len);
+       string->hash = full_name_hash(NULL, name, len);
        return 0;
  }
  
@@@ -424,17 -424,12 +424,17 @@@ static int xdr_decode(nfs_readdir_descr
  static
  int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
  {
 +      struct inode *inode;
        struct nfs_inode *nfsi;
  
        if (d_really_is_negative(dentry))
                return 0;
  
 -      nfsi = NFS_I(d_inode(dentry));
 +      inode = d_inode(dentry);
 +      if (is_bad_inode(inode) || NFS_STALE(inode))
 +              return 0;
 +
 +      nfsi = NFS_I(inode);
        if (entry->fattr->fileid == nfsi->fileid)
                return 1;
        if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
@@@ -502,7 -497,7 +502,7 @@@ void nfs_prime_dcache(struct dentry *pa
                if (filename.len == 2 && filename.name[1] == '.')
                        return;
        }
-       filename.hash = full_name_hash(filename.name, filename.len);
+       filename.hash = full_name_hash(parent, filename.name, filename.len);
  
        dentry = d_lookup(parent, &filename);
  again:
@@@ -1368,6 -1363,7 +1368,6 @@@ EXPORT_SYMBOL_GPL(nfs_dentry_operations
  struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
  {
        struct dentry *res;
 -      struct dentry *parent;
        struct inode *inode = NULL;
        struct nfs_fh *fhandle = NULL;
        struct nfs_fattr *fattr = NULL;
        if (IS_ERR(label))
                goto out;
  
 -      parent = dentry->d_parent;
        /* Protect against concurrent sillydeletes */
        trace_nfs_lookup_enter(dir, dentry, flags);
        error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
@@@ -1485,13 -1482,11 +1485,13 @@@ int nfs_atomic_open(struct inode *dir, 
                    struct file *file, unsigned open_flags,
                    umode_t mode, int *opened)
  {
 +      DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
        struct nfs_open_context *ctx;
        struct dentry *res;
        struct iattr attr = { .ia_valid = ATTR_OPEN };
        struct inode *inode;
        unsigned int lookup_flags = 0;
 +      bool switched = false;
        int err;
  
        /* Expect a negative dentry */
  
        /* NFS only supports OPEN on regular files */
        if ((open_flags & O_DIRECTORY)) {
 -              if (!d_unhashed(dentry)) {
 +              if (!d_in_lookup(dentry)) {
                        /*
                         * Hashed negative dentry with O_DIRECTORY: dentry was
                         * revalidated and is fine, no need to perform lookup
                attr.ia_size = 0;
        }
  
 +      if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
 +              d_drop(dentry);
 +              switched = true;
 +              dentry = d_alloc_parallel(dentry->d_parent,
 +                                        &dentry->d_name, &wq);
 +              if (IS_ERR(dentry))
 +                      return PTR_ERR(dentry);
 +              if (unlikely(!d_in_lookup(dentry)))
 +                      return finish_no_open(file, dentry);
 +      }
 +
        ctx = create_nfs_open_context(dentry, open_flags);
        err = PTR_ERR(ctx);
        if (IS_ERR(ctx))
                err = PTR_ERR(inode);
                trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
                put_nfs_open_context(ctx);
 +              d_drop(dentry);
                switch (err) {
                case -ENOENT:
 -                      d_drop(dentry);
                        d_add(dentry, NULL);
                        nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
                        break;
        trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
        put_nfs_open_context(ctx);
  out:
 +      if (unlikely(switched)) {
 +              d_lookup_done(dentry);
 +              dput(dentry);
 +      }
        return err;
  
  no_open:
        res = nfs_lookup(dir, dentry, lookup_flags);
 -      err = PTR_ERR(res);
 +      if (switched) {
 +              d_lookup_done(dentry);
 +              if (!res)
 +                      res = dentry;
 +              else
 +                      dput(dentry);
 +      }
        if (IS_ERR(res))
 -              goto out;
 -
 +              return PTR_ERR(res);
        return finish_no_open(file, res);
  }
  EXPORT_SYMBOL_GPL(nfs_atomic_open);
diff --combined net/core/dev.c
@@@ -94,7 -94,6 +94,7 @@@
  #include <linux/ethtool.h>
  #include <linux/notifier.h>
  #include <linux/skbuff.h>
 +#include <linux/bpf.h>
  #include <net/net_namespace.h>
  #include <net/sock.h>
  #include <net/busy_poll.h>
  #include <linux/hrtimer.h>
  #include <linux/netfilter_ingress.h>
  #include <linux/sctp.h>
 +#include <linux/crash_dump.h>
  
  #include "net-sysfs.h"
  
@@@ -198,7 -196,7 +198,7 @@@ static inline void dev_base_seq_inc(str
  
  static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
  {
-       unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+       unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));
  
        return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
  }
@@@ -2251,12 -2249,11 +2251,12 @@@ EXPORT_SYMBOL(netif_set_real_num_rx_que
   */
  int netif_get_num_default_rss_queues(void)
  {
 -      return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
 +      return is_kdump_kernel() ?
 +              1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
  }
  EXPORT_SYMBOL(netif_get_num_default_rss_queues);
  
 -static inline void __netif_reschedule(struct Qdisc *q)
 +static void __netif_reschedule(struct Qdisc *q)
  {
        struct softnet_data *sd;
        unsigned long flags;
@@@ -2423,7 -2420,7 +2423,7 @@@ EXPORT_SYMBOL(__skb_tx_hash)
  
  static void skb_warn_bad_offload(const struct sk_buff *skb)
  {
 -      static const netdev_features_t null_features = 0;
 +      static const netdev_features_t null_features;
        struct net_device *dev = skb->dev;
        const char *name = "";
  
@@@ -3071,7 -3068,6 +3071,7 @@@ static inline int __dev_xmit_skb(struc
                                 struct netdev_queue *txq)
  {
        spinlock_t *root_lock = qdisc_lock(q);
 +      struct sk_buff *to_free = NULL;
        bool contended;
        int rc;
  
        /*
         * Heuristic to force contended enqueues to serialize on a
         * separate lock before trying to get qdisc main lock.
 -       * This permits __QDISC___STATE_RUNNING owner to get the lock more
 +       * This permits qdisc->running owner to get the lock more
         * often and dequeue packets faster.
         */
        contended = qdisc_is_running(q);
  
        spin_lock(root_lock);
        if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
 -              kfree_skb(skb);
 +              __qdisc_drop(skb, &to_free);
                rc = NET_XMIT_DROP;
        } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
                   qdisc_run_begin(q)) {
  
                rc = NET_XMIT_SUCCESS;
        } else {
 -              rc = q->enqueue(skb, q) & NET_XMIT_MASK;
 +              rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
                if (qdisc_run_begin(q)) {
                        if (unlikely(contended)) {
                                spin_unlock(&q->busylock);
                }
        }
        spin_unlock(root_lock);
 +      if (unlikely(to_free))
 +              kfree_skb_list(to_free);
        if (unlikely(contended))
                spin_unlock(&q->busylock);
        return rc;
@@@ -3148,6 -3142,8 +3148,6 @@@ static void skb_update_prio(struct sk_b
  DEFINE_PER_CPU(int, xmit_recursion);
  EXPORT_SYMBOL(xmit_recursion);
  
 -#define RECURSION_LIMIT 10
 -
  /**
   *    dev_loopback_xmit - loop back @skb
   *    @net: network namespace this loopback is happening in
@@@ -3390,8 -3386,8 +3390,8 @@@ static int __dev_queue_xmit(struct sk_b
                int cpu = smp_processor_id(); /* ok because BHs are off */
  
                if (txq->xmit_lock_owner != cpu) {
 -
 -                      if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
 +                      if (unlikely(__this_cpu_read(xmit_recursion) >
 +                                   XMIT_RECURSION_LIMIT))
                                goto recursion_alert;
  
                        skb = validate_xmit_skb(skb, dev);
@@@ -3902,14 -3898,22 +3902,14 @@@ static void net_tx_action(struct softir
                        head = head->next_sched;
  
                        root_lock = qdisc_lock(q);
 -                      if (spin_trylock(root_lock)) {
 -                              smp_mb__before_atomic();
 -                              clear_bit(__QDISC_STATE_SCHED,
 -                                        &q->state);
 -                              qdisc_run(q);
 -                              spin_unlock(root_lock);
 -                      } else {
 -                              if (!test_bit(__QDISC_STATE_DEACTIVATED,
 -                                            &q->state)) {
 -                                      __netif_reschedule(q);
 -                              } else {
 -                                      smp_mb__before_atomic();
 -                                      clear_bit(__QDISC_STATE_SCHED,
 -                                                &q->state);
 -                              }
 -                      }
 +                      spin_lock(root_lock);
 +                      /* We need to make sure head->next_sched is read
 +                       * before clearing __QDISC_STATE_SCHED
 +                       */
 +                      smp_mb__before_atomic();
 +                      clear_bit(__QDISC_STATE_SCHED, &q->state);
 +                      qdisc_run(q);
 +                      spin_unlock(root_lock);
                }
        }
  }
@@@ -4973,7 -4977,7 +4973,7 @@@ bool sk_busy_loop(struct sock *sk, int 
  
                        if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
                                rc = napi->poll(napi, BUSY_POLL_BUDGET);
 -                              trace_napi_poll(napi);
 +                              trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
                                if (rc == BUSY_POLL_BUDGET) {
                                        napi_complete_done(napi, rc);
                                        napi_schedule(napi);
@@@ -5129,7 -5133,7 +5129,7 @@@ static int napi_poll(struct napi_struc
        work = 0;
        if (test_bit(NAPI_STATE_SCHED, &n->state)) {
                work = n->poll(n, weight);
 -              trace_napi_poll(n);
 +              trace_napi_poll(n, work, weight);
        }
  
        WARN_ON_ONCE(work > weight);
@@@ -5445,52 -5449,6 +5445,52 @@@ void *netdev_lower_get_next(struct net_
  }
  EXPORT_SYMBOL(netdev_lower_get_next);
  
 +/**
 + * netdev_all_lower_get_next - Get the next device from all lower neighbour list
 + * @dev: device
 + * @iter: list_head ** of the current position
 + *
 + * Gets the next netdev_adjacent from the dev's all lower neighbour
 + * list, starting from iter position. The caller must hold RTNL lock or
 + * its own locking that guarantees that the neighbour all lower
 + * list will remain unchanged.
 + */
 +struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
 +{
 +      struct netdev_adjacent *lower;
 +
 +      lower = list_entry(*iter, struct netdev_adjacent, list);
 +
 +      if (&lower->list == &dev->all_adj_list.lower)
 +              return NULL;
 +
 +      *iter = lower->list.next;
 +
 +      return lower->dev;
 +}
 +EXPORT_SYMBOL(netdev_all_lower_get_next);
 +
 +/**
 + * netdev_all_lower_get_next_rcu - Get the next device from all
 + *                               lower neighbour list, RCU variant
 + * @dev: device
 + * @iter: list_head ** of the current position
 + *
 + * Gets the next netdev_adjacent from the dev's all lower neighbour
 + * list, starting from iter position. The caller must hold RCU read lock.
 + */
 +struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
 +                                               struct list_head **iter)
 +{
 +      struct netdev_adjacent *lower;
 +
 +      lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
 +                                     struct netdev_adjacent, list);
 +
 +      return lower ? lower->dev : NULL;
 +}
 +EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
 +
  /**
   * netdev_lower_get_first_private_rcu - Get the first ->private from the
   *                                   lower neighbour list, RCU
@@@ -5961,7 -5919,7 +5961,7 @@@ static void netdev_adjacent_add_links(s
        struct net *net = dev_net(dev);
  
        list_for_each_entry(iter, &dev->adj_list.upper, list) {
 -              if (!net_eq(net,dev_net(iter->dev)))
 +              if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_add(iter->dev, dev,
                                          &iter->dev->adj_list.lower);
        }
  
        list_for_each_entry(iter, &dev->adj_list.lower, list) {
 -              if (!net_eq(net,dev_net(iter->dev)))
 +              if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_add(iter->dev, dev,
                                          &iter->dev->adj_list.upper);
@@@ -5986,7 -5944,7 +5986,7 @@@ static void netdev_adjacent_del_links(s
        struct net *net = dev_net(dev);
  
        list_for_each_entry(iter, &dev->adj_list.upper, list) {
 -              if (!net_eq(net,dev_net(iter->dev)))
 +              if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_del(iter->dev, dev->name,
                                          &iter->dev->adj_list.lower);
        }
  
        list_for_each_entry(iter, &dev->adj_list.lower, list) {
 -              if (!net_eq(net,dev_net(iter->dev)))
 +              if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_del(iter->dev, dev->name,
                                          &iter->dev->adj_list.upper);
@@@ -6011,7 -5969,7 +6011,7 @@@ void netdev_adjacent_rename_links(struc
        struct net *net = dev_net(dev);
  
        list_for_each_entry(iter, &dev->adj_list.upper, list) {
 -              if (!net_eq(net,dev_net(iter->dev)))
 +              if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_del(iter->dev, oldname,
                                          &iter->dev->adj_list.lower);
        }
  
        list_for_each_entry(iter, &dev->adj_list.lower, list) {
 -              if (!net_eq(net,dev_net(iter->dev)))
 +              if (!net_eq(net, dev_net(iter->dev)))
                        continue;
                netdev_adjacent_sysfs_del(iter->dev, oldname,
                                          &iter->dev->adj_list.upper);
@@@ -6088,50 -6046,6 +6088,50 @@@ void netdev_lower_state_changed(struct 
  }
  EXPORT_SYMBOL(netdev_lower_state_changed);
  
 +int netdev_default_l2upper_neigh_construct(struct net_device *dev,
 +                                         struct neighbour *n)
 +{
 +      struct net_device *lower_dev, *stop_dev;
 +      struct list_head *iter;
 +      int err;
 +
 +      netdev_for_each_lower_dev(dev, lower_dev, iter) {
 +              if (!lower_dev->netdev_ops->ndo_neigh_construct)
 +                      continue;
 +              err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
 +              if (err) {
 +                      stop_dev = lower_dev;
 +                      goto rollback;
 +              }
 +      }
 +      return 0;
 +
 +rollback:
 +      netdev_for_each_lower_dev(dev, lower_dev, iter) {
 +              if (lower_dev == stop_dev)
 +                      break;
 +              if (!lower_dev->netdev_ops->ndo_neigh_destroy)
 +                      continue;
 +              lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
 +      }
 +      return err;
 +}
 +EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
 +
 +void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
 +                                        struct neighbour *n)
 +{
 +      struct net_device *lower_dev;
 +      struct list_head *iter;
 +
 +      netdev_for_each_lower_dev(dev, lower_dev, iter) {
 +              if (!lower_dev->netdev_ops->ndo_neigh_destroy)
 +                      continue;
 +              lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
 +      }
 +}
 +EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
 +
  static void dev_change_rx_flags(struct net_device *dev, int flags)
  {
        const struct net_device_ops *ops = dev->netdev_ops;
@@@ -6615,38 -6529,6 +6615,38 @@@ int dev_change_proto_down(struct net_de
  }
  EXPORT_SYMBOL(dev_change_proto_down);
  
 +/**
 + *    dev_change_xdp_fd - set or clear a bpf program for a device rx path
 + *    @dev: device
 + *    @fd: new program fd or negative value to clear
 + *
 + *    Set or clear a bpf program for a device
 + */
 +int dev_change_xdp_fd(struct net_device *dev, int fd)
 +{
 +      const struct net_device_ops *ops = dev->netdev_ops;
 +      struct bpf_prog *prog = NULL;
 +      struct netdev_xdp xdp = {};
 +      int err;
 +
 +      if (!ops->ndo_xdp)
 +              return -EOPNOTSUPP;
 +      if (fd >= 0) {
 +              prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
 +              if (IS_ERR(prog))
 +                      return PTR_ERR(prog);
 +      }
 +
 +      xdp.command = XDP_SETUP_PROG;
 +      xdp.prog = prog;
 +      err = ops->ndo_xdp(dev, &xdp);
 +      if (err < 0 && prog)
 +              bpf_prog_put(prog);
 +
 +      return err;
 +}
 +EXPORT_SYMBOL(dev_change_xdp_fd);
 +
  /**
   *    dev_new_index   -       allocate an ifindex
   *    @net: the applicable net namespace