cxgb4: enable inner header checksum calculation
authorGanesh Goudar <ganeshgr@chelsio.com>
Fri, 11 May 2018 13:05:33 +0000 (18:35 +0530)
committerDavid S. Miller <davem@davemloft.net>
Fri, 11 May 2018 20:11:50 +0000 (16:11 -0400)
set cntrl bits to indicate whether inner header checksum
needs to be calculated whenever the packet is an encapsulated
packet and enable supported encap features.

Fixes: d0a1299c6bf7 ("cxgb4: add support for vxlan segmentation offload")
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/chelsio/cxgb4/t4_msg.h

index 03767c0..1e31b9d 100644 (file)
@@ -5626,8 +5626,15 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                        NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
                        NETIF_F_HW_TC;
 
-               if (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5)
+               if (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5) {
+                       netdev->hw_enc_features |= NETIF_F_IP_CSUM |
+                                                  NETIF_F_IPV6_CSUM |
+                                                  NETIF_F_RXCSUM |
+                                                  NETIF_F_GSO_UDP_TUNNEL |
+                                                  NETIF_F_TSO | NETIF_F_TSO6;
+
                        netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+               }
 
                if (highdma)
                        netdev->hw_features |= NETIF_F_HIGHDMA;
index 1a28df1..0f87e97 100644 (file)
@@ -1072,12 +1072,27 @@ static void *inline_tx_skb_header(const struct sk_buff *skb,
 static u64 hwcsum(enum chip_type chip, const struct sk_buff *skb)
 {
        int csum_type;
-       const struct iphdr *iph = ip_hdr(skb);
+       bool inner_hdr_csum = false;
+       u16 proto, ver;
 
-       if (iph->version == 4) {
-               if (iph->protocol == IPPROTO_TCP)
+       if (skb->encapsulation &&
+           (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5))
+               inner_hdr_csum = true;
+
+       if (inner_hdr_csum) {
+               ver = inner_ip_hdr(skb)->version;
+               proto = (ver == 4) ? inner_ip_hdr(skb)->protocol :
+                       inner_ipv6_hdr(skb)->nexthdr;
+       } else {
+               ver = ip_hdr(skb)->version;
+               proto = (ver == 4) ? ip_hdr(skb)->protocol :
+                       ipv6_hdr(skb)->nexthdr;
+       }
+
+       if (ver == 4) {
+               if (proto == IPPROTO_TCP)
                        csum_type = TX_CSUM_TCPIP;
-               else if (iph->protocol == IPPROTO_UDP)
+               else if (proto == IPPROTO_UDP)
                        csum_type = TX_CSUM_UDPIP;
                else {
 nocsum:                        /*
@@ -1090,19 +1105,29 @@ nocsum:                 /*
                /*
                 * this doesn't work with extension headers
                 */
-               const struct ipv6hdr *ip6h = (const struct ipv6hdr *)iph;
-
-               if (ip6h->nexthdr == IPPROTO_TCP)
+               if (proto == IPPROTO_TCP)
                        csum_type = TX_CSUM_TCPIP6;
-               else if (ip6h->nexthdr == IPPROTO_UDP)
+               else if (proto == IPPROTO_UDP)
                        csum_type = TX_CSUM_UDPIP6;
                else
                        goto nocsum;
        }
 
        if (likely(csum_type >= TX_CSUM_TCPIP)) {
-               u64 hdr_len = TXPKT_IPHDR_LEN_V(skb_network_header_len(skb));
-               int eth_hdr_len = skb_network_offset(skb) - ETH_HLEN;
+               int eth_hdr_len, l4_len;
+               u64 hdr_len;
+
+               if (inner_hdr_csum) {
+                       /* This allows checksum offload for all encapsulated
+                        * packets like GRE etc..
+                        */
+                       l4_len = skb_inner_network_header_len(skb);
+                       eth_hdr_len = skb_inner_network_offset(skb) - ETH_HLEN;
+               } else {
+                       l4_len = skb_network_header_len(skb);
+                       eth_hdr_len = skb_network_offset(skb) - ETH_HLEN;
+               }
+               hdr_len = TXPKT_IPHDR_LEN_V(l4_len);
 
                if (CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5)
                        hdr_len |= TXPKT_ETHHDR_LEN_V(eth_hdr_len);
@@ -1273,7 +1298,7 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb,
 netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        u32 wr_mid, ctrl0, op;
-       u64 cntrl, *end;
+       u64 cntrl, *end, *sgl;
        int qidx, credits;
        unsigned int flits, ndesc;
        struct adapter *adap;
@@ -1443,6 +1468,19 @@ out_free:        dev_kfree_skb_any(skb);
                                 TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |
                                 TXPKT_IPHDR_LEN_V(l3hdr_len);
                }
+               sgl = (u64 *)(cpl + 1); /* sgl start here */
+               if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) {
+                       /* If current position is already at the end of the
+                        * txq, reset the current to point to start of the queue
+                        * and update the end ptr as well.
+                        */
+                       if (sgl == (u64 *)q->q.stat) {
+                               int left = (u8 *)end - (u8 *)q->q.stat;
+
+                               end = (void *)q->q.desc + left;
+                               sgl = (void *)q->q.desc;
+                       }
+               }
                q->tso++;
                q->tx_cso += ssi->gso_segs;
        } else {
@@ -1454,6 +1492,7 @@ out_free: dev_kfree_skb_any(skb);
                wr->op_immdlen = htonl(FW_WR_OP_V(op) |
                                       FW_WR_IMMDLEN_V(len));
                cpl = (void *)(wr + 1);
+               sgl = (u64 *)(cpl + 1);
                if (skb->ip_summed == CHECKSUM_PARTIAL) {
                        cntrl = hwcsum(adap->params.chip, skb) |
                                TXPKT_IPCSUM_DIS_F;
@@ -1487,13 +1526,12 @@ out_free:       dev_kfree_skb_any(skb);
        cpl->ctrl1 = cpu_to_be64(cntrl);
 
        if (immediate) {
-               cxgb4_inline_tx_skb(skb, &q->q, cpl + 1);
+               cxgb4_inline_tx_skb(skb, &q->q, sgl);
                dev_consume_skb_any(skb);
        } else {
                int last_desc;
 
-               cxgb4_write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1),
-                               end, 0, addr);
+               cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, 0, addr);
                skb_orphan(skb);
 
                last_desc = q->q.pidx + ndesc - 1;
@@ -2259,7 +2297,7 @@ static void cxgb4_sgetim_to_hwtstamp(struct adapter *adap,
 }
 
 static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
-                  const struct cpl_rx_pkt *pkt)
+                  const struct cpl_rx_pkt *pkt, unsigned long tnl_hdr_len)
 {
        struct adapter *adapter = rxq->rspq.adap;
        struct sge *s = &adapter->sge;
@@ -2275,6 +2313,8 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
        }
 
        copy_frags(skb, gl, s->pktshift);
+       if (tnl_hdr_len)
+               skb->csum_level = 1;
        skb->len = gl->tot_len - s->pktshift;
        skb->data_len = skb->len;
        skb->truesize += skb->data_len;
@@ -2406,7 +2446,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
        struct sge *s = &q->adap->sge;
        int cpl_trace_pkt = is_t4(q->adap->params.chip) ?
                            CPL_TRACE_PKT : CPL_TRACE_PKT_T5;
-       u16 err_vec;
+       u16 err_vec, tnl_hdr_len = 0;
        struct port_info *pi;
        int ret = 0;
 
@@ -2415,16 +2455,19 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 
        pkt = (const struct cpl_rx_pkt *)rsp;
        /* Compressed error vector is enabled for T6 only */
-       if (q->adap->params.tp.rx_pkt_encap)
+       if (q->adap->params.tp.rx_pkt_encap) {
                err_vec = T6_COMPR_RXERR_VEC_G(be16_to_cpu(pkt->err_vec));
-       else
+               tnl_hdr_len = T6_RX_TNLHDR_LEN_G(ntohs(pkt->err_vec));
+       } else {
                err_vec = be16_to_cpu(pkt->err_vec);
+       }
 
        csum_ok = pkt->csum_calc && !err_vec &&
                  (q->netdev->features & NETIF_F_RXCSUM);
-       if ((pkt->l2info & htonl(RXF_TCP_F)) &&
+       if (((pkt->l2info & htonl(RXF_TCP_F)) ||
+            tnl_hdr_len) &&
            (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
-               do_gro(rxq, si, pkt);
+               do_gro(rxq, si, pkt, tnl_hdr_len);
                return 0;
        }
 
@@ -2471,7 +2514,13 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
                } else if (pkt->l2info & htonl(RXF_IP_F)) {
                        __sum16 c = (__force __sum16)pkt->csum;
                        skb->csum = csum_unfold(c);
-                       skb->ip_summed = CHECKSUM_COMPLETE;
+
+                       if (tnl_hdr_len) {
+                               skb->ip_summed = CHECKSUM_UNNECESSARY;
+                               skb->csum_level = 1;
+                       } else {
+                               skb->ip_summed = CHECKSUM_COMPLETE;
+                       }
                        rxq->stats.rx_cso++;
                }
        } else {
index fe2029e..09e38f0 100644 (file)
@@ -1233,6 +1233,11 @@ struct cpl_rx_pkt {
 #define T6_COMPR_RXERR_SUM_V(x) ((x) << T6_COMPR_RXERR_SUM_S)
 #define T6_COMPR_RXERR_SUM_F    T6_COMPR_RXERR_SUM_V(1U)
 
+#define T6_RX_TNLHDR_LEN_S    8
+#define T6_RX_TNLHDR_LEN_M    0xFF
+#define T6_RX_TNLHDR_LEN_V(x) ((x) << T6_RX_TNLHDR_LEN_S)
+#define T6_RX_TNLHDR_LEN_G(x) (((x) >> T6_RX_TNLHDR_LEN_S) & T6_RX_TNLHDR_LEN_M)
+
 struct cpl_trace_pkt {
        u8 opcode;
        u8 intf;