Merge tag 'block-5.13-2021-05-07' of git://git.kernel.dk/linux-block
[linux-2.6-microblaze.git] / net / core / dev.c
index 1f79b9a..222b1d3 100644 (file)
@@ -848,6 +848,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
 
+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
+{
+       int k = stack->num_paths++;
+
+       if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
+               return NULL;
+
+       return &stack->path[k];
+}
+
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+                         struct net_device_path_stack *stack)
+{
+       const struct net_device *last_dev;
+       struct net_device_path_ctx ctx = {
+               .dev    = dev,
+               .daddr  = daddr,
+       };
+       struct net_device_path *path;
+       int ret = 0;
+
+       stack->num_paths = 0;
+       while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
+               last_dev = ctx.dev;
+               path = dev_fwd_path(stack);
+               if (!path)
+                       return -1;
+
+               memset(path, 0, sizeof(struct net_device_path));
+               ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
+               if (ret < 0)
+                       return -1;
+
+               if (WARN_ON_ONCE(last_dev == ctx.dev))
+                       return -1;
+       }
+       path = dev_fwd_path(stack);
+       if (!path)
+               return -1;
+       path->type = DEV_PATH_ETHERNET;
+       path->dev = ctx.dev;
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
+
 /**
  *     __dev_get_by_name       - find a device by its name
  *     @net: the applicable net namespace
@@ -2463,16 +2509,14 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
 EXPORT_SYMBOL(netdev_txq_to_tc);
 
 #ifdef CONFIG_XPS
-struct static_key xps_needed __read_mostly;
-EXPORT_SYMBOL(xps_needed);
-struct static_key xps_rxqs_needed __read_mostly;
-EXPORT_SYMBOL(xps_rxqs_needed);
+static struct static_key xps_needed __read_mostly;
+static struct static_key xps_rxqs_needed __read_mostly;
 static DEFINE_MUTEX(xps_map_mutex);
 #define xmap_dereference(P)            \
        rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
 
 static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
-                            int tci, u16 index)
+                            struct xps_dev_maps *old_maps, int tci, u16 index)
 {
        struct xps_map *map = NULL;
        int pos;
@@ -2491,6 +2535,8 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
                        break;
                }
 
+               if (old_maps)
+                       RCU_INIT_POINTER(old_maps->attr_map[tci], NULL);
                RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
                kfree_rcu(map, rcu);
                return false;
@@ -2503,7 +2549,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
                                 struct xps_dev_maps *dev_maps,
                                 int cpu, u16 offset, u16 count)
 {
-       int num_tc = dev->num_tc ? : 1;
+       int num_tc = dev_maps->num_tc;
        bool active = false;
        int tci;
 
@@ -2511,7 +2557,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
                int i, j;
 
                for (i = count, j = offset; i--; j++) {
-                       if (!remove_xps_queue(dev_maps, tci, j))
+                       if (!remove_xps_queue(dev_maps, NULL, tci, j))
                                break;
                }
 
@@ -2523,74 +2569,54 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 
 static void reset_xps_maps(struct net_device *dev,
                           struct xps_dev_maps *dev_maps,
-                          bool is_rxqs_map)
+                          enum xps_map_type type)
 {
-       if (is_rxqs_map) {
-               static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
-               RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
-       } else {
-               RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
-       }
        static_key_slow_dec_cpuslocked(&xps_needed);
+       if (type == XPS_RXQS)
+               static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
+
+       RCU_INIT_POINTER(dev->xps_maps[type], NULL);
+
        kfree_rcu(dev_maps, rcu);
 }
 
-static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
-                          struct xps_dev_maps *dev_maps, unsigned int nr_ids,
-                          u16 offset, u16 count, bool is_rxqs_map)
+static void clean_xps_maps(struct net_device *dev, enum xps_map_type type,
+                          u16 offset, u16 count)
 {
+       struct xps_dev_maps *dev_maps;
        bool active = false;
        int i, j;
 
-       for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
-            j < nr_ids;)
-               active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
-                                              count);
+       dev_maps = xmap_dereference(dev->xps_maps[type]);
+       if (!dev_maps)
+               return;
+
+       for (j = 0; j < dev_maps->nr_ids; j++)
+               active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count);
        if (!active)
-               reset_xps_maps(dev, dev_maps, is_rxqs_map);
+               reset_xps_maps(dev, dev_maps, type);
 
-       if (!is_rxqs_map) {
-               for (i = offset + (count - 1); count--; i--) {
+       if (type == XPS_CPUS) {
+               for (i = offset + (count - 1); count--; i--)
                        netdev_queue_numa_node_write(
-                               netdev_get_tx_queue(dev, i),
-                               NUMA_NO_NODE);
-               }
+                               netdev_get_tx_queue(dev, i), NUMA_NO_NODE);
        }
 }
 
 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
                                   u16 count)
 {
-       const unsigned long *possible_mask = NULL;
-       struct xps_dev_maps *dev_maps;
-       unsigned int nr_ids;
-
        if (!static_key_false(&xps_needed))
                return;
 
        cpus_read_lock();
        mutex_lock(&xps_map_mutex);
 
-       if (static_key_false(&xps_rxqs_needed)) {
-               dev_maps = xmap_dereference(dev->xps_rxqs_map);
-               if (dev_maps) {
-                       nr_ids = dev->num_rx_queues;
-                       clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
-                                      offset, count, true);
-               }
-       }
-
-       dev_maps = xmap_dereference(dev->xps_cpus_map);
-       if (!dev_maps)
-               goto out_no_maps;
+       if (static_key_false(&xps_rxqs_needed))
+               clean_xps_maps(dev, XPS_RXQS, offset, count);
 
-       if (num_possible_cpus() > 1)
-               possible_mask = cpumask_bits(cpu_possible_mask);
-       nr_ids = nr_cpu_ids;
-       clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
-                      false);
+       clean_xps_maps(dev, XPS_CPUS, offset, count);
 
-out_no_maps:
        mutex_unlock(&xps_map_mutex);
        cpus_read_unlock();
 }
@@ -2640,16 +2666,35 @@ static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
        return new_map;
 }
 
+/* Copy xps maps at a given index */
+static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps,
+                             struct xps_dev_maps *new_dev_maps, int index,
+                             int tc, bool skip_tc)
+{
+       int i, tci = index * dev_maps->num_tc;
+       struct xps_map *map;
+
+       /* copy maps belonging to foreign traffic classes */
+       for (i = 0; i < dev_maps->num_tc; i++, tci++) {
+               if (i == tc && skip_tc)
+                       continue;
+
+               /* fill in the new device map from the old device map */
+               map = xmap_dereference(dev_maps->attr_map[tci]);
+               RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
+       }
+}
+
 /* Must be called under cpus_read_lock */
 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
-                         u16 index, bool is_rxqs_map)
+                         u16 index, enum xps_map_type type)
 {
-       const unsigned long *online_mask = NULL, *possible_mask = NULL;
-       struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+       struct xps_dev_maps *dev_maps, *new_dev_maps = NULL, *old_dev_maps = NULL;
+       const unsigned long *online_mask = NULL;
+       bool active = false, copy = false;
        int i, j, tci, numa_node_id = -2;
        int maps_sz, num_tc = 1, tc = 0;
        struct xps_map *map, *new_map;
-       bool active = false;
        unsigned int nr_ids;
 
        if (dev->num_tc) {
@@ -2667,38 +2712,48 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
        }
 
        mutex_lock(&xps_map_mutex);
-       if (is_rxqs_map) {
+
+       dev_maps = xmap_dereference(dev->xps_maps[type]);
+       if (type == XPS_RXQS) {
                maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
-               dev_maps = xmap_dereference(dev->xps_rxqs_map);
                nr_ids = dev->num_rx_queues;
        } else {
                maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
-               if (num_possible_cpus() > 1) {
+               if (num_possible_cpus() > 1)
                        online_mask = cpumask_bits(cpu_online_mask);
-                       possible_mask = cpumask_bits(cpu_possible_mask);
-               }
-               dev_maps = xmap_dereference(dev->xps_cpus_map);
                nr_ids = nr_cpu_ids;
        }
 
        if (maps_sz < L1_CACHE_BYTES)
                maps_sz = L1_CACHE_BYTES;
 
+       /* The old dev_maps could be larger or smaller than the one we're
+        * setting up now, as dev->num_tc or nr_ids could have been updated in
+        * between. We could try to be smart, but let's be safe instead and only
+        * copy foreign traffic classes if the two map sizes match.
+        */
+       if (dev_maps &&
+           dev_maps->num_tc == num_tc && dev_maps->nr_ids == nr_ids)
+               copy = true;
+
        /* allocate memory for queue storage */
        for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
             j < nr_ids;) {
-               if (!new_dev_maps)
-                       new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
                if (!new_dev_maps) {
-                       mutex_unlock(&xps_map_mutex);
-                       return -ENOMEM;
+                       new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
+                       if (!new_dev_maps) {
+                               mutex_unlock(&xps_map_mutex);
+                               return -ENOMEM;
+                       }
+
+                       new_dev_maps->nr_ids = nr_ids;
+                       new_dev_maps->num_tc = num_tc;
                }
 
                tci = j * num_tc + tc;
-               map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
-                                NULL;
+               map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL;
 
-               map = expand_xps_map(map, j, index, is_rxqs_map);
+               map = expand_xps_map(map, j, index, type == XPS_RXQS);
                if (!map)
                        goto error;
 
@@ -2711,29 +2766,21 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
        if (!dev_maps) {
                /* Increment static keys at most once per type */
                static_key_slow_inc_cpuslocked(&xps_needed);
-               if (is_rxqs_map)
+               if (type == XPS_RXQS)
                        static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
        }
 
-       for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-            j < nr_ids;) {
-               /* copy maps belonging to foreign traffic classes */
-               for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
-                       /* fill in the new device map from the old device map */
-                       map = xmap_dereference(dev_maps->attr_map[tci]);
-                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
-               }
+       for (j = 0; j < nr_ids; j++) {
+               bool skip_tc = false;
 
-               /* We need to explicitly update tci as prevous loop
-                * could break out early if dev_maps is NULL.
-                */
                tci = j * num_tc + tc;
-
                if (netif_attr_test_mask(j, mask, nr_ids) &&
                    netif_attr_test_online(j, online_mask, nr_ids)) {
                        /* add tx-queue to CPU/rx-queue maps */
                        int pos = 0;
 
+                       skip_tc = true;
+
                        map = xmap_dereference(new_dev_maps->attr_map[tci]);
                        while ((pos < map->len) && (map->queues[pos] != index))
                                pos++;
@@ -2741,78 +2788,81 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
                        if (pos == map->len)
                                map->queues[map->len++] = index;
 #ifdef CONFIG_NUMA
-                       if (!is_rxqs_map) {
+                       if (type == XPS_CPUS) {
                                if (numa_node_id == -2)
                                        numa_node_id = cpu_to_node(j);
                                else if (numa_node_id != cpu_to_node(j))
                                        numa_node_id = -1;
                        }
 #endif
-               } else if (dev_maps) {
-                       /* fill in the new device map from the old device map */
-                       map = xmap_dereference(dev_maps->attr_map[tci]);
-                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
                }
 
-               /* copy maps belonging to foreign traffic classes */
-               for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
-                       /* fill in the new device map from the old device map */
-                       map = xmap_dereference(dev_maps->attr_map[tci]);
-                       RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
-               }
+               if (copy)
+                       xps_copy_dev_maps(dev_maps, new_dev_maps, j, tc,
+                                         skip_tc);
        }
 
-       if (is_rxqs_map)
-               rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
-       else
-               rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
+       rcu_assign_pointer(dev->xps_maps[type], new_dev_maps);
 
        /* Cleanup old maps */
        if (!dev_maps)
                goto out_no_old_maps;
 
-       for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-            j < nr_ids;) {
-               for (i = num_tc, tci = j * num_tc; i--; tci++) {
-                       new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+       for (j = 0; j < dev_maps->nr_ids; j++) {
+               for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) {
                        map = xmap_dereference(dev_maps->attr_map[tci]);
-                       if (map && map != new_map)
-                               kfree_rcu(map, rcu);
+                       if (!map)
+                               continue;
+
+                       if (copy) {
+                               new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+                               if (map == new_map)
+                                       continue;
+                       }
+
+                       RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
+                       kfree_rcu(map, rcu);
                }
        }
 
-       kfree_rcu(dev_maps, rcu);
+       old_dev_maps = dev_maps;
 
 out_no_old_maps:
        dev_maps = new_dev_maps;
        active = true;
 
 out_no_new_maps:
-       if (!is_rxqs_map) {
+       if (type == XPS_CPUS)
                /* update Tx queue numa node */
                netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
                                             (numa_node_id >= 0) ?
                                             numa_node_id : NUMA_NO_NODE);
-       }
 
        if (!dev_maps)
                goto out_no_maps;
 
        /* removes tx-queue from unused CPUs/rx-queues */
-       for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-            j < nr_ids;) {
-               for (i = tc, tci = j * num_tc; i--; tci++)
-                       active |= remove_xps_queue(dev_maps, tci, index);
-               if (!netif_attr_test_mask(j, mask, nr_ids) ||
-                   !netif_attr_test_online(j, online_mask, nr_ids))
-                       active |= remove_xps_queue(dev_maps, tci, index);
-               for (i = num_tc - tc, tci++; --i; tci++)
-                       active |= remove_xps_queue(dev_maps, tci, index);
+       for (j = 0; j < dev_maps->nr_ids; j++) {
+               tci = j * dev_maps->num_tc;
+
+               for (i = 0; i < dev_maps->num_tc; i++, tci++) {
+                       if (i == tc &&
+                           netif_attr_test_mask(j, mask, dev_maps->nr_ids) &&
+                           netif_attr_test_online(j, online_mask, dev_maps->nr_ids))
+                               continue;
+
+                       active |= remove_xps_queue(dev_maps,
+                                                  copy ? old_dev_maps : NULL,
+                                                  tci, index);
+               }
        }
 
+       if (old_dev_maps)
+               kfree_rcu(old_dev_maps, rcu);
+
        /* free map if not active */
        if (!active)
-               reset_xps_maps(dev, dev_maps, is_rxqs_map);
+               reset_xps_maps(dev, dev_maps, type);
 
 out_no_maps:
        mutex_unlock(&xps_map_mutex);
@@ -2820,11 +2870,10 @@ out_no_maps:
        return 0;
 error:
        /* remove any maps that we added */
-       for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-            j < nr_ids;) {
+       for (j = 0; j < nr_ids; j++) {
                for (i = num_tc, tci = j * num_tc; i--; tci++) {
                        new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
-                       map = dev_maps ?
+                       map = copy ?
                              xmap_dereference(dev_maps->attr_map[tci]) :
                              NULL;
                        if (new_map && new_map != map)
@@ -2845,7 +2894,7 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
        int ret;
 
        cpus_read_lock();
-       ret =  __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
+       ret =  __netif_set_xps_queue(dev, cpumask_bits(mask), index, XPS_CPUS);
        cpus_read_unlock();
 
        return ret;
@@ -3956,13 +4005,15 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
                               struct xps_dev_maps *dev_maps, unsigned int tci)
 {
+       int tc = netdev_get_prio_tc_map(dev, skb->priority);
        struct xps_map *map;
        int queue_index = -1;
 
-       if (dev->num_tc) {
-               tci *= dev->num_tc;
-               tci += netdev_get_prio_tc_map(dev, skb->priority);
-       }
+       if (tc >= dev_maps->num_tc || tci >= dev_maps->nr_ids)
+               return queue_index;
+
+       tci *= dev_maps->num_tc;
+       tci += tc;
 
        map = rcu_dereference(dev_maps->attr_map[tci]);
        if (map) {
@@ -3993,18 +4044,18 @@ static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
        if (!static_key_false(&xps_rxqs_needed))
                goto get_cpus_map;
 
-       dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
+       dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_RXQS]);
        if (dev_maps) {
                int tci = sk_rx_queue_get(sk);
 
-               if (tci >= 0 && tci < dev->num_rx_queues)
+               if (tci >= 0)
                        queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
                                                          tci);
        }
 
 get_cpus_map:
        if (queue_index < 0) {
-               dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
+               dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_CPUS]);
                if (dev_maps) {
                        unsigned int tci = skb->sender_cpu - 1;
 
@@ -4672,10 +4723,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
        void *orig_data, *orig_data_end, *hard_start;
        struct netdev_rx_queue *rxqueue;
        u32 metalen, act = XDP_DROP;
+       bool orig_bcast, orig_host;
        u32 mac_len, frame_sz;
        __be16 orig_eth_type;
        struct ethhdr *eth;
-       bool orig_bcast;
        int off;
 
        /* Reinjected packets coming from act_mirred or similar should
@@ -4722,6 +4773,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
        orig_data_end = xdp->data_end;
        orig_data = xdp->data;
        eth = (struct ethhdr *)xdp->data;
+       orig_host = ether_addr_equal_64bits(eth->h_dest, skb->dev->dev_addr);
        orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
        orig_eth_type = eth->h_proto;
 
@@ -4749,8 +4801,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
        /* check if XDP changed eth hdr such SKB needs update */
        eth = (struct ethhdr *)xdp->data;
        if ((orig_eth_type != eth->h_proto) ||
+           (orig_host != ether_addr_equal_64bits(eth->h_dest,
+                                                 skb->dev->dev_addr)) ||
            (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
                __skb_push(skb, ETH_HLEN);
+               skb->pkt_type = PACKET_HOST;
                skb->protocol = eth_type_trans(skb, skb->dev);
        }
 
@@ -5284,6 +5339,7 @@ skip_classify:
                        goto another_round;
                case RX_HANDLER_EXACT:
                        deliver_exact = true;
+                       break;
                case RX_HANDLER_PASS:
                        break;
                default:
@@ -5876,15 +5932,13 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-static struct list_head *gro_list_prepare(struct napi_struct *napi,
-                                         struct sk_buff *skb)
+static void gro_list_prepare(const struct list_head *head,
+                            const struct sk_buff *skb)
 {
        unsigned int maclen = skb->dev->hard_header_len;
        u32 hash = skb_get_hash_raw(skb);
-       struct list_head *head;
        struct sk_buff *p;
 
-       head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
        list_for_each_entry(p, head, list) {
                unsigned long diffs;
 
@@ -5910,11 +5964,9 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
                                       maclen);
                NAPI_GRO_CB(p)->same_flow = !diffs;
        }
-
-       return head;
 }
 
-static void skb_gro_reset_offset(struct sk_buff *skb)
+static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
 {
        const struct skb_shared_info *pinfo = skb_shinfo(skb);
        const skb_frag_t *frag0 = &pinfo->frags[0];
@@ -5925,7 +5977,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
 
        if (!skb_headlen(skb) && pinfo->nr_frags &&
            !PageHighMem(skb_frag_page(frag0)) &&
-           (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
+           (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
                NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
                NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
                                                    skb_frag_size(frag0),
@@ -5975,11 +6027,11 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
 
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
-       u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+       u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+       struct gro_list *gro_list = &napi->gro_hash[bucket];
        struct list_head *head = &offload_base;
        struct packet_offload *ptype;
        __be16 type = skb->protocol;
-       struct list_head *gro_head;
        struct sk_buff *pp = NULL;
        enum gro_result ret;
        int same_flow;
@@ -5988,7 +6040,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
        if (netif_elide_gro(skb->dev))
                goto normal;
 
-       gro_head = gro_list_prepare(napi, skb);
+       gro_list_prepare(&gro_list->list, skb);
 
        rcu_read_lock();
        list_for_each_entry_rcu(ptype, head, list) {
@@ -6024,7 +6076,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 
                pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
                                        ipv6_gro_receive, inet_gro_receive,
-                                       gro_head, skb);
+                                       &gro_list->list, skb);
                break;
        }
        rcu_read_unlock();
@@ -6043,7 +6095,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
        if (pp) {
                skb_list_del_init(pp);
                napi_gro_complete(napi, pp);
-               napi->gro_hash[hash].count--;
+               gro_list->count--;
        }
 
        if (same_flow)
@@ -6052,16 +6104,16 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
        if (NAPI_GRO_CB(skb)->flush)
                goto normal;
 
-       if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
-               gro_flush_oldest(napi, gro_head);
-       } else {
-               napi->gro_hash[hash].count++;
-       }
+       if (unlikely(gro_list->count >= MAX_GRO_SKBS))
+               gro_flush_oldest(napi, &gro_list->list);
+       else
+               gro_list->count++;
+
        NAPI_GRO_CB(skb)->count = 1;
        NAPI_GRO_CB(skb)->age = jiffies;
        NAPI_GRO_CB(skb)->last = skb;
        skb_shinfo(skb)->gso_size = skb_gro_len(skb);
-       list_add(&skb->list, gro_head);
+       list_add(&skb->list, &gro_list->list);
        ret = GRO_HELD;
 
 pull:
@@ -6069,11 +6121,11 @@ pull:
        if (grow > 0)
                gro_pull_from_frag0(skb, grow);
 ok:
-       if (napi->gro_hash[hash].count) {
-               if (!test_bit(hash, &napi->gro_bitmask))
-                       __set_bit(hash, &napi->gro_bitmask);
-       } else if (test_bit(hash, &napi->gro_bitmask)) {
-               __clear_bit(hash, &napi->gro_bitmask);
+       if (gro_list->count) {
+               if (!test_bit(bucket, &napi->gro_bitmask))
+                       __set_bit(bucket, &napi->gro_bitmask);
+       } else if (test_bit(bucket, &napi->gro_bitmask)) {
+               __clear_bit(bucket, &napi->gro_bitmask);
        }
 
        return ret;
@@ -6143,7 +6195,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
        skb_mark_napi_id(skb, napi);
        trace_napi_gro_receive_entry(skb);
 
-       skb_gro_reset_offset(skb);
+       skb_gro_reset_offset(skb, 0);
 
        ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
        trace_napi_gro_receive_exit(ret);
@@ -6232,7 +6284,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
        napi->skb = NULL;
 
        skb_reset_mac_header(skb);
-       skb_gro_reset_offset(skb);
+       skb_gro_reset_offset(skb, hlen);
 
        if (unlikely(skb_gro_header_hard(skb, hlen))) {
                eth = skb_gro_header_slow(skb, hlen, 0);
@@ -6790,6 +6842,7 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
 
        return err;
 }
+EXPORT_SYMBOL(dev_set_threaded);
 
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
                    int (*poll)(struct napi_struct *, int), int weight)
@@ -10338,14 +10391,20 @@ EXPORT_SYMBOL(register_netdev);
 
 int netdev_refcnt_read(const struct net_device *dev)
 {
+#ifdef CONFIG_PCPU_DEV_REFCNT
        int i, refcnt = 0;
 
        for_each_possible_cpu(i)
                refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
        return refcnt;
+#else
+       return refcount_read(&dev->dev_refcnt);
+#endif
 }
 EXPORT_SYMBOL(netdev_refcnt_read);
 
+int netdev_unregister_timeout_secs __read_mostly = 10;
+
 #define WAIT_REFS_MIN_MSECS 1
 #define WAIT_REFS_MAX_MSECS 250
 /**
@@ -10370,7 +10429,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
        rebroadcast_time = warning_time = jiffies;
        refcnt = netdev_refcnt_read(dev);
 
-       while (refcnt != 0) {
+       while (refcnt != 1) {
                if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
                        rtnl_lock();
 
@@ -10407,7 +10466,9 @@ static void netdev_wait_allrefs(struct net_device *dev)
 
                refcnt = netdev_refcnt_read(dev);
 
-               if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) {
+               if (refcnt != 1 &&
+                   time_after(jiffies, warning_time +
+                              netdev_unregister_timeout_secs * HZ)) {
                        pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
                                 dev->name, refcnt);
                        warning_time = jiffies;
@@ -10483,7 +10544,7 @@ void netdev_run_todo(void)
                netdev_wait_allrefs(dev);
 
                /* paranoia */
-               BUG_ON(netdev_refcnt_read(dev));
+               BUG_ON(netdev_refcnt_read(dev) != 1);
                BUG_ON(!list_empty(&dev->ptype_all));
                BUG_ON(!list_empty(&dev->ptype_specific));
                WARN_ON(rcu_access_pointer(dev->ip_ptr));
@@ -10700,9 +10761,14 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        dev = PTR_ALIGN(p, NETDEV_ALIGN);
        dev->padded = (char *)dev - (char *)p;
 
+#ifdef CONFIG_PCPU_DEV_REFCNT
        dev->pcpu_refcnt = alloc_percpu(int);
        if (!dev->pcpu_refcnt)
                goto free_dev;
+       dev_hold(dev);
+#else
+       refcount_set(&dev->dev_refcnt, 1);
+#endif
 
        if (dev_addr_init(dev))
                goto free_pcpu;
@@ -10766,8 +10832,10 @@ free_all:
        return NULL;
 
 free_pcpu:
+#ifdef CONFIG_PCPU_DEV_REFCNT
        free_percpu(dev->pcpu_refcnt);
 free_dev:
+#endif
        netdev_freemem(dev);
        return NULL;
 }
@@ -10809,8 +10877,10 @@ void free_netdev(struct net_device *dev)
        list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
                netif_napi_del(p);
 
+#ifdef CONFIG_PCPU_DEV_REFCNT
        free_percpu(dev->pcpu_refcnt);
        dev->pcpu_refcnt = NULL;
+#endif
        free_percpu(dev->xdp_bulkq);
        dev->xdp_bulkq = NULL;
 
@@ -10998,11 +11068,13 @@ void unregister_netdev(struct net_device *dev)
 EXPORT_SYMBOL(unregister_netdev);
 
 /**
- *     dev_change_net_namespace - move device to different nethost namespace
+ *     __dev_change_net_namespace - move device to different nethost namespace
  *     @dev: device
  *     @net: network namespace
  *     @pat: If not NULL name pattern to try if the current device name
  *           is already taken in the destination network namespace.
+ *     @new_ifindex: If not zero, specifies device index in the target
+ *                   namespace.
  *
  *     This function shuts down a device interface and moves it
  *     to a new network namespace. On success 0 is returned, on
@@ -11011,10 +11083,11 @@ EXPORT_SYMBOL(unregister_netdev);
  *     Callers must hold the rtnl semaphore.
  */
 
-int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
+int __dev_change_net_namespace(struct net_device *dev, struct net *net,
+                              const char *pat, int new_ifindex)
 {
        struct net *net_old = dev_net(dev);
-       int err, new_nsid, new_ifindex;
+       int err, new_nsid;
 
        ASSERT_RTNL();
 
@@ -11045,6 +11118,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
                        goto out;
        }
 
+       /* Check that new_ifindex isn't used yet. */
+       err = -EBUSY;
+       if (new_ifindex && __dev_get_by_index(net, new_ifindex))
+               goto out;
+
        /*
         * And now a mini version of register_netdevice unregister_netdevice.
         */
@@ -11072,10 +11150,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 
        new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
        /* If there is an ifindex conflict assign a new one */
-       if (__dev_get_by_index(net, dev->ifindex))
-               new_ifindex = dev_new_index(net);
-       else
-               new_ifindex = dev->ifindex;
+       if (!new_ifindex) {
+               if (__dev_get_by_index(net, dev->ifindex))
+                       new_ifindex = dev_new_index(net);
+               else
+                       new_ifindex = dev->ifindex;
+       }
 
        rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
                            new_ifindex);
@@ -11128,7 +11208,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 out:
        return err;
 }
-EXPORT_SYMBOL_GPL(dev_change_net_namespace);
+EXPORT_SYMBOL_GPL(__dev_change_net_namespace);
 
 static int dev_cpu_dead(unsigned int oldcpu)
 {