net: add busy_poll device feature
[linux-2.6-microblaze.git] / net / core / dev.c
index 45fa2f1..75e88e0 100644 (file)
@@ -1245,7 +1245,7 @@ static int __dev_open(struct net_device *dev)
         * If we don't do this there is a chance ndo_poll_controller
         * or ndo_poll may be running while we open the device
         */
-       netpoll_rx_disable(dev);
+       netpoll_poll_disable(dev);
 
        ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
        ret = notifier_to_errno(ret);
@@ -1260,7 +1260,7 @@ static int __dev_open(struct net_device *dev)
        if (!ret && ops->ndo_open)
                ret = ops->ndo_open(dev);
 
-       netpoll_rx_enable(dev);
+       netpoll_poll_enable(dev);
 
        if (ret)
                clear_bit(__LINK_STATE_START, &dev->state);
@@ -1313,6 +1313,9 @@ static int __dev_close_many(struct list_head *head)
        might_sleep();
 
        list_for_each_entry(dev, head, close_list) {
+               /* Temporarily disable netpoll until the interface is down */
+               netpoll_poll_disable(dev);
+
                call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
 
                clear_bit(__LINK_STATE_START, &dev->state);
@@ -1343,6 +1346,7 @@ static int __dev_close_many(struct list_head *head)
 
                dev->flags &= ~IFF_UP;
                net_dmaengine_put();
+               netpoll_poll_enable(dev);
        }
 
        return 0;
@@ -1353,14 +1357,10 @@ static int __dev_close(struct net_device *dev)
        int retval;
        LIST_HEAD(single);
 
-       /* Temporarily disable netpoll until the interface is down */
-       netpoll_rx_disable(dev);
-
        list_add(&dev->close_list, &single);
        retval = __dev_close_many(&single);
        list_del(&single);
 
-       netpoll_rx_enable(dev);
        return retval;
 }
 
@@ -1398,14 +1398,9 @@ int dev_close(struct net_device *dev)
        if (dev->flags & IFF_UP) {
                LIST_HEAD(single);
 
-               /* Block netpoll rx while the interface is going down */
-               netpoll_rx_disable(dev);
-
                list_add(&dev->close_list, &single);
                dev_close_many(&single);
                list_del(&single);
-
-               netpoll_rx_enable(dev);
        }
        return 0;
 }
@@ -1645,8 +1640,7 @@ static inline void net_timestamp_set(struct sk_buff *skb)
                        __net_timestamp(SKB);           \
        }                                               \
 
-static inline bool is_skb_forwardable(struct net_device *dev,
-                                     struct sk_buff *skb)
+bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
 {
        unsigned int len;
 
@@ -1665,6 +1659,7 @@ static inline bool is_skb_forwardable(struct net_device *dev,
 
        return false;
 }
+EXPORT_SYMBOL_GPL(is_skb_forwardable);
 
 /**
  * dev_forward_skb - loopback an skb to another netif
@@ -2885,6 +2880,7 @@ recursion_alert:
        rc = -ENETDOWN;
        rcu_read_unlock_bh();
 
+       atomic_long_inc(&dev->tx_dropped);
        kfree_skb(skb);
        return rc;
 out:
@@ -2957,7 +2953,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                flow_table = rcu_dereference(rxqueue->rps_flow_table);
                if (!flow_table)
                        goto out;
-               flow_id = skb->rxhash & flow_table->mask;
+               flow_id = skb_get_hash(skb) & flow_table->mask;
                rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
                                                        rxq_index, flow_id);
                if (rc < 0)
@@ -2991,6 +2987,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
        struct rps_sock_flow_table *sock_flow_table;
        int cpu = -1;
        u16 tcpu;
+       u32 hash;
 
        if (skb_rx_queue_recorded(skb)) {
                u16 index = skb_get_rx_queue(skb);
@@ -3019,7 +3016,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
        }
 
        skb_reset_network_header(skb);
-       if (!skb_get_hash(skb))
+       hash = skb_get_hash(skb);
+       if (!hash)
                goto done;
 
        flow_table = rcu_dereference(rxqueue->rps_flow_table);
@@ -3028,11 +3026,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                u16 next_cpu;
                struct rps_dev_flow *rflow;
 
-               rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
+               rflow = &flow_table->flows[hash & flow_table->mask];
                tcpu = rflow->cpu;
 
-               next_cpu = sock_flow_table->ents[skb->rxhash &
-                   sock_flow_table->mask];
+               next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
 
                /*
                 * If the desired CPU (where last recvmsg was done) is
@@ -3061,7 +3058,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
        }
 
        if (map) {
-               tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+               tcpu = map->cpus[((u64) hash * map->len) >> 32];
 
                if (cpu_online(tcpu)) {
                        cpu = tcpu;
@@ -3236,10 +3233,6 @@ static int netif_rx_internal(struct sk_buff *skb)
 {
        int ret;
 
-       /* if netpoll wants it, pretend we never saw it */
-       if (netpoll_rx(skb))
-               return NET_RX_DROP;
-
        net_timestamp_check(netdev_tstamp_prequeue, skb);
 
        trace_netif_rx(skb);
@@ -3446,7 +3439,7 @@ out:
  *     @rx_handler: receive handler to register
  *     @rx_handler_data: data pointer that is used by rx handler
  *
- *     Register a receive hander for a device. This handler will then be
+ *     Register a receive handler for a device. This handler will then be
  *     called from __netif_receive_skb. A negative errno code is returned
  *     on a failure.
  *
@@ -3500,11 +3493,11 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
 static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
 {
        switch (skb->protocol) {
-       case __constant_htons(ETH_P_ARP):
-       case __constant_htons(ETH_P_IP):
-       case __constant_htons(ETH_P_IPV6):
-       case __constant_htons(ETH_P_8021Q):
-       case __constant_htons(ETH_P_8021AD):
+       case htons(ETH_P_ARP):
+       case htons(ETH_P_IP):
+       case htons(ETH_P_IPV6):
+       case htons(ETH_P_8021Q):
+       case htons(ETH_P_8021AD):
                return true;
        default:
                return false;
@@ -3525,10 +3518,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 
        trace_netif_receive_skb(skb);
 
-       /* if we've gotten here through NAPI, check netpoll */
-       if (netpoll_receive_skb(skb))
-               goto out;
-
        orig_dev = skb->dev;
 
        skb_reset_network_header(skb);
@@ -3655,7 +3644,6 @@ drop:
 
 unlock:
        rcu_read_unlock();
-out:
        return ret;
 }
 
@@ -3845,10 +3833,10 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
                diffs |= p->vlan_tci ^ skb->vlan_tci;
                if (maclen == ETH_HLEN)
                        diffs |= compare_ether_header(skb_mac_header(p),
-                                                     skb_gro_mac_header(skb));
+                                                     skb_mac_header(skb));
                else if (!diffs)
                        diffs = memcmp(skb_mac_header(p),
-                                      skb_gro_mac_header(skb),
+                                      skb_mac_header(skb),
                                       maclen);
                NAPI_GRO_CB(p)->same_flow = !diffs;
        }
@@ -3871,6 +3859,27 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
        }
 }
 
+static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
+{
+       struct skb_shared_info *pinfo = skb_shinfo(skb);
+
+       BUG_ON(skb->end - skb->tail < grow);
+
+       memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
+
+       skb->data_len -= grow;
+       skb->tail += grow;
+
+       pinfo->frags[0].page_offset += grow;
+       skb_frag_size_sub(&pinfo->frags[0], grow);
+
+       if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
+               skb_frag_unref(skb, 0);
+               memmove(pinfo->frags, pinfo->frags + 1,
+                       --pinfo->nr_frags * sizeof(pinfo->frags[0]));
+       }
+}
+
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
        struct sk_buff **pp = NULL;
@@ -3879,14 +3888,14 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
        struct list_head *head = &offload_base;
        int same_flow;
        enum gro_result ret;
+       int grow;
 
-       if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
+       if (!(skb->dev->features & NETIF_F_GRO))
                goto normal;
 
        if (skb_is_gso(skb) || skb_has_frag_list(skb))
                goto normal;
 
-       skb_gro_reset_offset(skb);
        gro_list_prepare(napi, skb);
        NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
 
@@ -3950,27 +3959,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
        ret = GRO_HELD;
 
 pull:
-       if (skb_headlen(skb) < skb_gro_offset(skb)) {
-               int grow = skb_gro_offset(skb) - skb_headlen(skb);
-
-               BUG_ON(skb->end - skb->tail < grow);
-
-               memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
-
-               skb->tail += grow;
-               skb->data_len -= grow;
-
-               skb_shinfo(skb)->frags[0].page_offset += grow;
-               skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
-
-               if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
-                       skb_frag_unref(skb, 0);
-                       memmove(skb_shinfo(skb)->frags,
-                               skb_shinfo(skb)->frags + 1,
-                               --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
-               }
-       }
-
+       grow = skb_gro_offset(skb) - skb_headlen(skb);
+       if (grow > 0)
+               gro_pull_from_frag0(skb, grow);
 ok:
        return ret;
 
@@ -4038,6 +4029,8 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
        trace_napi_gro_receive_entry(skb);
 
+       skb_gro_reset_offset(skb);
+
        return napi_skb_finish(dev_gro_receive(napi, skb), skb);
 }
 EXPORT_SYMBOL(napi_gro_receive);
@@ -4066,12 +4059,16 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_get_frags);
 
-static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
-                              gro_result_t ret)
+static gro_result_t napi_frags_finish(struct napi_struct *napi,
+                                     struct sk_buff *skb,
+                                     gro_result_t ret)
 {
        switch (ret) {
        case GRO_NORMAL:
-               if (netif_receive_skb_internal(skb))
+       case GRO_HELD:
+               __skb_push(skb, ETH_HLEN);
+               skb->protocol = eth_type_trans(skb, skb->dev);
+               if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
                        ret = GRO_DROP;
                break;
 
@@ -4080,7 +4077,6 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
                napi_reuse_skb(napi, skb);
                break;
 
-       case GRO_HELD:
        case GRO_MERGED:
                break;
        }
@@ -4088,17 +4084,41 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
        return ret;
 }
 
+/* Upper GRO stack assumes network header starts at gro_offset=0
+ * Drivers could call both napi_gro_frags() and napi_gro_receive()
+ * We copy ethernet header into skb->data to have a common layout.
+ */
 static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
 {
        struct sk_buff *skb = napi->skb;
+       const struct ethhdr *eth;
+       unsigned int hlen = sizeof(*eth);
 
        napi->skb = NULL;
 
-       if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {
-               napi_reuse_skb(napi, skb);
-               return NULL;
+       skb_reset_mac_header(skb);
+       skb_gro_reset_offset(skb);
+
+       eth = skb_gro_header_fast(skb, 0);
+       if (unlikely(skb_gro_header_hard(skb, hlen))) {
+               eth = skb_gro_header_slow(skb, hlen, 0);
+               if (unlikely(!eth)) {
+                       napi_reuse_skb(napi, skb);
+                       return NULL;
+               }
+       } else {
+               gro_pull_from_frag0(skb, hlen);
+               NAPI_GRO_CB(skb)->frag0 += hlen;
+               NAPI_GRO_CB(skb)->frag0_len -= hlen;
        }
-       skb->protocol = eth_type_trans(skb, skb->dev);
+       __skb_pull(skb, hlen);
+
+       /*
+        * This works because the only protocols we care about don't require
+        * special handling.
+        * We'll fix it up properly in napi_frags_finish()
+        */
+       skb->protocol = eth->h_proto;
 
        return skb;
 }
@@ -4135,8 +4155,8 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
                        struct softnet_data *next = remsd->rps_ipi_next;
 
                        if (cpu_online(remsd->cpu))
-                               __smp_call_function_single(remsd->cpu,
-                                                          &remsd->csd, 0);
+                               smp_call_function_single_async(remsd->cpu,
+                                                          &remsd->csd);
                        remsd = next;
                }
        } else
@@ -5676,6 +5696,13 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
                }
        }
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       if (dev->netdev_ops->ndo_busy_poll)
+               features |= NETIF_F_BUSY_POLL;
+       else
+#endif
+               features &= ~NETIF_F_BUSY_POLL;
+
        return features;
 }
 
@@ -6251,6 +6278,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
                netdev_stats_to_stats64(storage, &dev->stats);
        }
        storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
+       storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
        return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);