Merge tag 'net-5.10-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Nov 2020 22:02:04 +0000 (14:02 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Nov 2020 22:02:04 +0000 (14:02 -0800)
Pull networking fixes from Jakub Kicinski:
 "Current release - regressions:

   - arm64: dts: fsl-ls1028a-kontron-sl28: specify in-band mode for
     ENETC

  Current release - bugs in new features:

   - mptcp: provide rmem[0] limit offset to fix oops

  Previous release - regressions:

   - IPv6: Set SIT tunnel hard_header_len to zero to fix path MTU
     calculations

   - lan743x: correctly handle chips with internal PHY

   - bpf: Don't rely on GCC __attribute__((optimize)) to disable GCSE

   - mlx5e: Fix VXLAN port table synchronization after function reload

  Previous release - always broken:

   - bpf: Zero-fill re-used per-cpu map element

   - fix out-of-order UDP packets when forwarding with UDP GSO fraglists
     turned on:
       - fix UDP header access on Fast/frag0 UDP GRO
       - fix IP header access and skb lookup on Fast/frag0 UDP GRO

   - ethtool: netlink: add missing netdev_features_change() call

   - net: Update window_clamp if SOCK_RCVBUF is set

   - igc: Fix returning wrong statistics

   - ch_ktls: fix multiple leaks and corner cases in Chelsio TLS offload

   - tunnels: Fix off-by-one in lower MTU bounds for ICMP/ICMPv6 replies

   - r8169: disable hw csum for short packets on all chip versions

   - vrf: Fix fast path output packet handling with async Netfilter
     rules"

* tag 'net-5.10-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (65 commits)
  lan743x: fix use of uninitialized variable
  net: udp: fix IP header access and skb lookup on Fast/frag0 UDP GRO
  net: udp: fix UDP header access on Fast/frag0 UDP GRO
  devlink: Avoid overwriting port attributes of registered port
  vrf: Fix fast path output packet handling with async Netfilter rules
  cosa: Add missing kfree in error path of cosa_write
  net: switch to the kernel.org patchwork instance
  ch_ktls: stop the txq if reaches threshold
  ch_ktls: tcb update fails sometimes
  ch_ktls/cxgb4: handle partial tag alone SKBs
  ch_ktls: don't free skb before sending FIN
  ch_ktls: packet handling prior to start marker
  ch_ktls: Correction in middle record handling
  ch_ktls: missing handling of header alone
  ch_ktls: Correction in trimmed_len calculation
  cxgb4/ch_ktls: creating skbs causes panic
  ch_ktls: Update cheksum information
  ch_ktls: Correction in finding correct length
  cxgb4/ch_ktls: decrypted bit is not enough
  net/x25: Fix null-ptr-deref in x25_connect
  ...

76 files changed:
Documentation/networking/netdev-FAQ.rst
Documentation/networking/phy.rst
Documentation/process/stable-kernel-rules.rst
Documentation/translations/it_IT/process/stable-kernel-rules.rst
MAINTAINERS
arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
drivers/net/dsa/mv88e6xxx/devlink.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/i40e/i40e_xsk.c
drivers/net/ethernet/intel/igc/igc_main.c
drivers/net/ethernet/marvell/prestera/Kconfig
drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
drivers/net/ethernet/microchip/lan743x_main.c
drivers/net/ethernet/microchip/lan743x_main.h
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/phy/realtek.c
drivers/net/vrf.c
drivers/net/wan/cosa.c
include/linux/compiler-gcc.h
include/linux/compiler_types.h
include/linux/filter.h
include/net/xsk_buff_pool.h
kernel/bpf/Makefile
kernel/bpf/bpf_lsm.c
kernel/bpf/core.c
kernel/bpf/hashtab.c
kernel/bpf/preload/Kconfig
net/core/devlink.c
net/ethtool/features.c
net/ipv4/ip_tunnel_core.c
net/ipv4/syncookies.c
net/ipv4/udp_offload.c
net/ipv6/sit.c
net/ipv6/syncookies.c
net/ipv6/udp_offload.c
net/iucv/af_iucv.c
net/mptcp/protocol.c
net/netlabel/netlabel_unlabeled.c
net/tipc/topsrv.c
net/x25/af_x25.c
net/xdp/xsk.c
net/xdp/xsk_buff_pool.c
samples/bpf/task_fd_query_user.c
samples/bpf/tracex2_user.c
samples/bpf/tracex3_user.c
samples/bpf/xdp_redirect_cpu_user.c
samples/bpf/xdp_rxq_info_user.c
scripts/bpf_helpers_doc.py
tools/bpf/bpftool/feature.c
tools/bpf/bpftool/prog.c
tools/bpf/bpftool/skeleton/profiler.bpf.c
tools/lib/bpf/hashmap.h
tools/lib/bpf/xsk.c
tools/testing/selftests/bpf/prog_tests/map_init.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/profiler.inc.h
tools/testing/selftests/bpf/progs/test_map_init.c [new file with mode: 0644]
tools/testing/selftests/tc-testing/tc-tests/filters/tests.json

index d5c9320..2153776 100644 (file)
@@ -110,7 +110,7 @@ Q: I sent a patch and I'm wondering what happened to it?
 Q: How can I tell whether it got merged?
 A: Start by looking at the main patchworks queue for netdev:
 
-  http://patchwork.ozlabs.org/project/netdev/list/
+  https://patchwork.kernel.org/project/netdevbpf/list/
 
 The "State" field will tell you exactly where things are at with your
 patch.
@@ -152,7 +152,7 @@ networking subsystem, and then hands them off to Greg.
 
 There is a patchworks queue that you can see here:
 
-  http://patchwork.ozlabs.org/bundle/davem/stable/?state=*
+  https://patchwork.kernel.org/bundle/netdev/stable/?state=*
 
 It contains the patches which Dave has selected, but not yet handed off
 to Greg.  If Greg already has the patch, then it will be here:
index 2561060..b2f7ec7 100644 (file)
@@ -247,8 +247,8 @@ Some of the interface modes are described below:
     speeds (see below.)
 
 ``PHY_INTERFACE_MODE_2500BASEX``
-    This defines a variant of 1000BASE-X which is clocked 2.5 times faster,
-    than the 802.3 standard giving a fixed bit rate of 3.125Gbaud.
+    This defines a variant of 1000BASE-X which is clocked 2.5 times as fast
+    as the 802.3 standard, giving a fixed bit rate of 3.125Gbaud.
 
 ``PHY_INTERFACE_MODE_SGMII``
     This is used for Cisco SGMII, which is a modification of 1000BASE-X
index 06f743b..3973556 100644 (file)
@@ -39,7 +39,7 @@ Procedure for submitting patches to the -stable tree
    submission guidelines as described in
    :ref:`Documentation/networking/netdev-FAQ.rst <netdev-FAQ>`
    after first checking the stable networking queue at
-   https://patchwork.ozlabs.org/bundle/davem/stable/?series=&submitter=&state=*&q=&archive=
+   https://patchwork.kernel.org/bundle/netdev/stable/?state=*
    to ensure the requested patch is not already queued up.
  - Security patches should not be handled (solely) by the -stable review
    process but should follow the procedures in
index 4f206ce..283d625 100644 (file)
@@ -46,7 +46,7 @@ Procedura per sottomettere patch per i sorgenti -stable
    :ref:`Documentation/translations/it_IT/networking/netdev-FAQ.rst <it_netdev-FAQ>`;
    ma solo dopo aver verificato al seguente indirizzo che la patch non sia
    già in coda:
-   https://patchwork.ozlabs.org/bundle/davem/stable/?series=&submitter=&state=*&q=&archive=
+   https://patchwork.kernel.org/bundle/netdev/stable/?state=*
  - Una patch di sicurezza non dovrebbero essere gestite (solamente) dal processo
    di revisione -stable, ma dovrebbe seguire le procedure descritte in
    :ref:`Documentation/translations/it_IT/admin-guide/security-bugs.rst <it_securitybugs>`.
index 94ac10a..4a34b25 100644 (file)
@@ -1279,7 +1279,7 @@ M:        Igor Russkikh <irusskikh@marvell.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     https://www.marvell.com/
-Q:     http://patchwork.ozlabs.org/project/netdev/list/
+Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 F:     Documentation/networking/device_drivers/ethernet/aquantia/atlantic.rst
 F:     drivers/net/ethernet/aquantia/atlantic/
 
@@ -8830,8 +8830,8 @@ S:        Supported
 W:     http://www.intel.com/support/feedback.htm
 W:     http://e1000.sourceforge.net/
 Q:     http://patchwork.ozlabs.org/project/intel-wired-lan/list/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-queue.git
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue.git
 F:     Documentation/networking/device_drivers/ethernet/intel/
 F:     drivers/net/ethernet/intel/
 F:     drivers/net/ethernet/intel/*/
@@ -11174,7 +11174,7 @@ M:      Tariq Toukan <tariqt@nvidia.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://www.mellanox.com
-Q:     http://patchwork.ozlabs.org/project/netdev/list/
+Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 F:     drivers/net/ethernet/mellanox/mlx4/en_*
 
 MELLANOX ETHERNET DRIVER (mlx5e)
@@ -11182,7 +11182,7 @@ M:      Saeed Mahameed <saeedm@nvidia.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://www.mellanox.com
-Q:     http://patchwork.ozlabs.org/project/netdev/list/
+Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 F:     drivers/net/ethernet/mellanox/mlx5/core/en_*
 
 MELLANOX ETHERNET INNOVA DRIVERS
@@ -11190,7 +11190,7 @@ R:      Boris Pismenny <borisp@nvidia.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://www.mellanox.com
-Q:     http://patchwork.ozlabs.org/project/netdev/list/
+Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 F:     drivers/net/ethernet/mellanox/mlx5/core/accel/*
 F:     drivers/net/ethernet/mellanox/mlx5/core/en_accel/*
 F:     drivers/net/ethernet/mellanox/mlx5/core/fpga/*
@@ -11202,7 +11202,7 @@ M:      Ido Schimmel <idosch@nvidia.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://www.mellanox.com
-Q:     http://patchwork.ozlabs.org/project/netdev/list/
+Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 F:     drivers/net/ethernet/mellanox/mlxsw/
 F:     tools/testing/selftests/drivers/net/mlxsw/
 
@@ -11211,7 +11211,7 @@ M:      mlxsw@nvidia.com
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://www.mellanox.com
-Q:     http://patchwork.ozlabs.org/project/netdev/list/
+Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 F:     drivers/net/ethernet/mellanox/mlxfw/
 
 MELLANOX HARDWARE PLATFORM SUPPORT
@@ -11230,7 +11230,7 @@ L:      netdev@vger.kernel.org
 L:     linux-rdma@vger.kernel.org
 S:     Supported
 W:     http://www.mellanox.com
-Q:     http://patchwork.ozlabs.org/project/netdev/list/
+Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 F:     drivers/net/ethernet/mellanox/mlx4/
 F:     include/linux/mlx4/
 
@@ -11251,7 +11251,7 @@ L:      netdev@vger.kernel.org
 L:     linux-rdma@vger.kernel.org
 S:     Supported
 W:     http://www.mellanox.com
-Q:     http://patchwork.ozlabs.org/project/netdev/list/
+Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 F:     Documentation/networking/device_drivers/ethernet/mellanox/
 F:     drivers/net/ethernet/mellanox/mlx5/core/
 F:     include/linux/mlx5/
@@ -12131,7 +12131,7 @@ M:      Jakub Kicinski <kuba@kernel.org>
 L:     netdev@vger.kernel.org
 S:     Maintained
 W:     http://www.linuxfoundation.org/en/Net
-Q:     http://patchwork.ozlabs.org/project/netdev/list/
+Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
 F:     Documentation/devicetree/bindings/net/
@@ -12176,7 +12176,7 @@ M:      Jakub Kicinski <kuba@kernel.org>
 L:     netdev@vger.kernel.org
 S:     Maintained
 W:     http://www.linuxfoundation.org/en/Net
-Q:     http://patchwork.ozlabs.org/project/netdev/list/
+Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 B:     mailto:netdev@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
@@ -15247,7 +15247,6 @@ F:      drivers/iommu/s390-iommu.c
 S390 IUCV NETWORK LAYER
 M:     Julian Wiedmann <jwi@linux.ibm.com>
 M:     Karsten Graul <kgraul@linux.ibm.com>
-M:     Ursula Braun <ubraun@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 S:     Supported
 W:     http://www.ibm.com/developerworks/linux/linux390/
@@ -15258,7 +15257,6 @@ F:      net/iucv/
 S390 NETWORK DRIVERS
 M:     Julian Wiedmann <jwi@linux.ibm.com>
 M:     Karsten Graul <kgraul@linux.ibm.com>
-M:     Ursula Braun <ubraun@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 S:     Supported
 W:     http://www.ibm.com/developerworks/linux/linux390/
@@ -15829,7 +15827,6 @@ S:      Maintained
 F:     drivers/misc/sgi-xp/
 
 SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
-M:     Ursula Braun <ubraun@linux.ibm.com>
 M:     Karsten Graul <kgraul@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 S:     Supported
index f46eb47..8161dd2 100644 (file)
@@ -75,6 +75,7 @@
 &enetc_port0 {
        phy-handle = <&phy0>;
        phy-connection-type = "sgmii";
+       managed = "in-band-status";
        status = "okay";
 
        mdio {
index 10cd1bf..ade04c0 100644 (file)
@@ -393,8 +393,10 @@ static int mv88e6xxx_region_atu_snapshot(struct devlink *dl,
        mv88e6xxx_reg_lock(chip);
 
        err = mv88e6xxx_fid_map(chip, fid_bitmap);
-       if (err)
+       if (err) {
+               kfree(table);
                goto out;
+       }
 
        while (1) {
                fid = find_next_bit(fid_bitmap, MV88E6XXX_N_FID, fid + 1);
index 3352dad..2730860 100644 (file)
@@ -2124,6 +2124,9 @@ void cxgb4_inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
 void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
                     struct ulptx_sgl *sgl, u64 *end, unsigned int start,
                     const dma_addr_t *addr);
+void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q,
+                            struct ulptx_sgl *sgl, u64 *end,
+                            const dma_addr_t *addr, u32 start, u32 send_len);
 void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n);
 int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf,
                    u16 vlan);
index 0273f40..17410fe 100644 (file)
@@ -3573,6 +3573,8 @@ static int chcr_stats_show(struct seq_file *seq, void *v)
                   atomic64_read(&adap->ch_ktls_stats.ktls_tx_complete_pkts));
        seq_printf(seq, "TX trim pkts :                    %20llu\n",
                   atomic64_read(&adap->ch_ktls_stats.ktls_tx_trimmed_pkts));
+       seq_printf(seq, "TX sw fallback :                  %20llu\n",
+                  atomic64_read(&adap->ch_ktls_stats.ktls_tx_fallback));
        while (i < MAX_NPORTS) {
                ktls_port = &adap->ch_ktls_stats.ktls_port[i];
                seq_printf(seq, "Port %d\n", i);
index a952fe1..7fd264a 100644 (file)
@@ -1176,6 +1176,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
                txq = netdev_pick_tx(dev, skb, sb_dev);
                if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) ||
                    skb->encapsulation ||
+                   cxgb4_is_ktls_skb(skb) ||
                    (proto != IPPROTO_TCP && proto != IPPROTO_UDP))
                        txq = txq % pi->nqsets;
 
index b169776..1b49f2f 100644 (file)
@@ -388,6 +388,7 @@ struct ch_ktls_stats_debug {
        atomic64_t ktls_tx_retransmit_pkts;
        atomic64_t ktls_tx_complete_pkts;
        atomic64_t ktls_tx_trimmed_pkts;
+       atomic64_t ktls_tx_fallback;
 };
 #endif
 
@@ -493,6 +494,11 @@ struct cxgb4_uld_info {
 #endif
 };
 
+static inline bool cxgb4_is_ktls_skb(struct sk_buff *skb)
+{
+       return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
+}
+
 void cxgb4_uld_enable(struct adapter *adap);
 void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
 int cxgb4_unregister_uld(enum cxgb4_uld type);
index a9e9c7a..196652a 100644 (file)
@@ -890,6 +890,114 @@ void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
 }
 EXPORT_SYMBOL(cxgb4_write_sgl);
 
+/*     cxgb4_write_partial_sgl - populate SGL for partial packet
+ *     @skb: the packet
+ *     @q: the Tx queue we are writing into
+ *     @sgl: starting location for writing the SGL
+ *     @end: points right after the end of the SGL
+ *     @addr: the list of bus addresses for the SGL elements
+ *     @start: start offset in the SKB where partial data starts
+ *     @len: length of data from @start to send out
+ *
+ *     This API will handle sending out partial data of a skb if required.
+ *     Unlike cxgb4_write_sgl, @start can be any offset into the skb data,
+ *     and @len will decide how much data after @start offset to send out.
+ */
+void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q,
+                            struct ulptx_sgl *sgl, u64 *end,
+                            const dma_addr_t *addr, u32 start, u32 len)
+{
+       struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1] = {0}, *to;
+       u32 frag_size, skb_linear_data_len = skb_headlen(skb);
+       struct skb_shared_info *si = skb_shinfo(skb);
+       u8 i = 0, frag_idx = 0, nfrags = 0;
+       skb_frag_t *frag;
+
+       /* Fill the first SGL either from linear data or from partial
+        * frag based on @start.
+        */
+       if (unlikely(start < skb_linear_data_len)) {
+               frag_size = min(len, skb_linear_data_len - start);
+               sgl->len0 = htonl(frag_size);
+               sgl->addr0 = cpu_to_be64(addr[0] + start);
+               len -= frag_size;
+               nfrags++;
+       } else {
+               start -= skb_linear_data_len;
+               frag = &si->frags[frag_idx];
+               frag_size = skb_frag_size(frag);
+               /* find the first frag */
+               while (start >= frag_size) {
+                       start -= frag_size;
+                       frag_idx++;
+                       frag = &si->frags[frag_idx];
+                       frag_size = skb_frag_size(frag);
+               }
+
+               frag_size = min(len, skb_frag_size(frag) - start);
+               sgl->len0 = cpu_to_be32(frag_size);
+               sgl->addr0 = cpu_to_be64(addr[frag_idx + 1] + start);
+               len -= frag_size;
+               nfrags++;
+               frag_idx++;
+       }
+
+       /* If the entire partial data fit in one SGL, then send it out
+        * now.
+        */
+       if (!len)
+               goto done;
+
+       /* Most of the complexity below deals with the possibility we hit the
+        * end of the queue in the middle of writing the SGL.  For this case
+        * only we create the SGL in a temporary buffer and then copy it.
+        */
+       to = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge;
+
+       /* If the skb couldn't fit in first SGL completely, fill the
+        * rest of the frags in subsequent SGLs. Note that each SGL
+        * pair can store 2 frags.
+        */
+       while (len) {
+               frag_size = min(len, skb_frag_size(&si->frags[frag_idx]));
+               to->len[i & 1] = cpu_to_be32(frag_size);
+               to->addr[i & 1] = cpu_to_be64(addr[frag_idx + 1]);
+               if (i && (i & 1))
+                       to++;
+               nfrags++;
+               frag_idx++;
+               i++;
+               len -= frag_size;
+       }
+
+       /* If we ended in an odd boundary, then set the second SGL's
+        * length in the pair to 0.
+        */
+       if (i & 1)
+               to->len[1] = cpu_to_be32(0);
+
+       /* Copy from temporary buffer to Tx ring, in case we hit the
+        * end of the queue in the middle of writing the SGL.
+        */
+       if (unlikely((u8 *)end > (u8 *)q->stat)) {
+               u32 part0 = (u8 *)q->stat - (u8 *)sgl->sge, part1;
+
+               if (likely(part0))
+                       memcpy(sgl->sge, buf, part0);
+               part1 = (u8 *)end - (u8 *)q->stat;
+               memcpy(q->desc, (u8 *)buf + part0, part1);
+               end = (void *)q->desc + part1;
+       }
+
+       /* 0-pad to multiple of 16 */
+       if ((uintptr_t)end & 8)
+               *end = 0;
+done:
+       sgl->cmd_nsge = htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL) |
+                       ULPTX_NSGE_V(nfrags));
+}
+EXPORT_SYMBOL(cxgb4_write_partial_sgl);
+
 /* This function copies 64 byte coalesced work request to
  * memory mapped BAR2 space. For coalesced WR SGE fetches
  * data from the FIFO instead of from Host.
@@ -1422,7 +1530,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 #endif /* CHELSIO_IPSEC_INLINE */
 
 #if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
-       if (skb->decrypted)
+       if (cxgb4_is_ktls_skb(skb) &&
+           (skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb))))
                return adap->uld[CXGB4_ULD_KTLS].tx_handler(skb, dev);
 #endif /* CHELSIO_TLS_DEVICE */
 
index 5195f69..c24485c 100644 (file)
 static LIST_HEAD(uld_ctx_list);
 static DEFINE_MUTEX(dev_mutex);
 
+/* chcr_get_nfrags_to_send: get the remaining nfrags after start offset
+ * @skb: skb
+ * @start: start offset.
+ * @len: how much data to send after @start
+ */
+static int chcr_get_nfrags_to_send(struct sk_buff *skb, u32 start, u32 len)
+{
+       struct skb_shared_info *si = skb_shinfo(skb);
+       u32 frag_size, skb_linear_data_len = skb_headlen(skb);
+       u8 nfrags = 0, frag_idx = 0;
+       skb_frag_t *frag;
+
+       /* if its a linear skb then return 1 */
+       if (!skb_is_nonlinear(skb))
+               return 1;
+
+       if (unlikely(start < skb_linear_data_len)) {
+               frag_size = min(len, skb_linear_data_len - start);
+               start = 0;
+       } else {
+               start -= skb_linear_data_len;
+
+               frag = &si->frags[frag_idx];
+               frag_size = skb_frag_size(frag);
+               while (start >= frag_size) {
+                       start -= frag_size;
+                       frag_idx++;
+                       frag = &si->frags[frag_idx];
+                       frag_size = skb_frag_size(frag);
+               }
+               frag_size = min(len, skb_frag_size(frag) - start);
+       }
+       len -= frag_size;
+       nfrags++;
+
+       while (len) {
+               frag_size = min(len, skb_frag_size(&si->frags[frag_idx]));
+               len -= frag_size;
+               nfrags++;
+               frag_idx++;
+       }
+       return nfrags;
+}
+
 static int chcr_init_tcb_fields(struct chcr_ktls_info *tx_info);
 /*
  * chcr_ktls_save_keys: calculate and save crypto keys.
@@ -689,7 +733,8 @@ static int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input)
 }
 
 static void *__chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
-                                       u32 tid, void *pos, u16 word, u64 mask,
+                                       u32 tid, void *pos, u16 word,
+                                       struct sge_eth_txq *q, u64 mask,
                                        u64 val, u32 reply)
 {
        struct cpl_set_tcb_field_core *cpl;
@@ -698,7 +743,10 @@ static void *__chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
 
        /* ULP_TXPKT */
        txpkt = pos;
-       txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0));
+       txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) |
+                               ULP_TXPKT_CHANNELID_V(tx_info->port_id) |
+                               ULP_TXPKT_FID_V(q->q.cntxt_id) |
+                               ULP_TXPKT_RO_F);
        txpkt->len = htonl(DIV_ROUND_UP(CHCR_SET_TCB_FIELD_LEN, 16));
 
        /* ULPTX_IDATA sub-command */
@@ -753,7 +801,7 @@ static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
                } else {
                        u8 buf[48] = {0};
 
-                       __chcr_write_cpl_set_tcb_ulp(tx_info, tid, buf, word,
+                       __chcr_write_cpl_set_tcb_ulp(tx_info, tid, buf, word, q,
                                                     mask, val, reply);
 
                        return chcr_copy_to_txd(buf, &q->q, pos,
@@ -761,7 +809,7 @@ static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
                }
        }
 
-       pos = __chcr_write_cpl_set_tcb_ulp(tx_info, tid, pos, word,
+       pos = __chcr_write_cpl_set_tcb_ulp(tx_info, tid, pos, word, q,
                                           mask, val, reply);
 
        /* check again if we are at the end of the queue */
@@ -783,11 +831,11 @@ static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
  */
 static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
                                   struct sge_eth_txq *q, u64 tcp_seq,
-                                  u64 tcp_ack, u64 tcp_win)
+                                  u64 tcp_ack, u64 tcp_win, bool offset)
 {
        bool first_wr = ((tx_info->prev_ack == 0) && (tx_info->prev_win == 0));
        struct ch_ktls_port_stats_debug *port_stats;
-       u32 len, cpl = 0, ndesc, wr_len;
+       u32 len, cpl = 0, ndesc, wr_len, wr_mid = 0;
        struct fw_ulptx_wr *wr;
        int credits;
        void *pos;
@@ -803,6 +851,11 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
                return NETDEV_TX_BUSY;
        }
 
+       if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+               chcr_eth_txq_stop(q);
+               wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+       }
+
        pos = &q->q.desc[q->q.pidx];
        /* make space for WR, we'll fill it later when we know all the cpls
         * being sent out and have complete length.
@@ -818,7 +871,7 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
                cpl++;
        }
        /* reset snd una if it's a re-transmit pkt */
-       if (tcp_seq != tx_info->prev_seq) {
+       if (tcp_seq != tx_info->prev_seq || offset) {
                /* reset snd_una */
                port_stats =
                        &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id];
@@ -827,7 +880,8 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
                                                 TCB_SND_UNA_RAW_V
                                                 (TCB_SND_UNA_RAW_M),
                                                 TCB_SND_UNA_RAW_V(0), 0);
-               atomic64_inc(&port_stats->ktls_tx_ooo);
+               if (tcp_seq != tx_info->prev_seq)
+                       atomic64_inc(&port_stats->ktls_tx_ooo);
                cpl++;
        }
        /* update ack */
@@ -856,7 +910,8 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
                wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
                wr->cookie = 0;
                /* fill len in wr field */
-               wr->flowid_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16)));
+               wr->flowid_len16 = htonl(wr_mid |
+                                        FW_WR_LEN16_V(DIV_ROUND_UP(len, 16)));
 
                ndesc = DIV_ROUND_UP(len, 64);
                chcr_txq_advance(&q->q, ndesc);
@@ -865,35 +920,15 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
        return 0;
 }
 
-/*
- * chcr_ktls_skb_copy
- * @nskb - new skb where the frags to be added.
- * @skb - old skb from which frags will be copied.
- */
-static void chcr_ktls_skb_copy(struct sk_buff *skb, struct sk_buff *nskb)
-{
-       int i;
-
-       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-               skb_shinfo(nskb)->frags[i] = skb_shinfo(skb)->frags[i];
-               __skb_frag_ref(&skb_shinfo(nskb)->frags[i]);
-       }
-
-       skb_shinfo(nskb)->nr_frags = skb_shinfo(skb)->nr_frags;
-       nskb->len += skb->data_len;
-       nskb->data_len = skb->data_len;
-       nskb->truesize += skb->data_len;
-}
-
 /*
  * chcr_ktls_get_tx_flits
  * returns number of flits to be sent out, it includes key context length, WR
  * size and skb fragments.
  */
 static unsigned int
-chcr_ktls_get_tx_flits(const struct sk_buff *skb, unsigned int key_ctx_len)
+chcr_ktls_get_tx_flits(u32 nr_frags, unsigned int key_ctx_len)
 {
-       return chcr_sgl_len(skb_shinfo(skb)->nr_frags) +
+       return chcr_sgl_len(nr_frags) +
               DIV_ROUND_UP(key_ctx_len + CHCR_KTLS_WR_SIZE, 8);
 }
 
@@ -957,8 +992,10 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
        struct tcphdr *tcp;
        int len16, pktlen;
        struct iphdr *ip;
+       u32 wr_mid = 0;
        int credits;
        u8 buf[150];
+       u64 cntrl1;
        void *pos;
 
        iplen = skb_network_header_len(skb);
@@ -967,7 +1004,7 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
        /* packet length = eth hdr len + ip hdr len + tcp hdr len
         * (including options).
         */
-       pktlen = skb->len - skb->data_len;
+       pktlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
 
        ctrl = sizeof(*cpl) + pktlen;
        len16 = DIV_ROUND_UP(sizeof(*wr) + ctrl, 16);
@@ -980,6 +1017,11 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
                return NETDEV_TX_BUSY;
        }
 
+       if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+               chcr_eth_txq_stop(q);
+               wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+       }
+
        pos = &q->q.desc[q->q.pidx];
        wr = pos;
 
@@ -987,7 +1029,7 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
        wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) |
                               FW_WR_IMMDLEN_V(ctrl));
 
-       wr->equiq_to_len16 = htonl(FW_WR_LEN16_V(len16));
+       wr->equiq_to_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
        wr->r3 = 0;
 
        cpl = (void *)(wr + 1);
@@ -997,22 +1039,28 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
                           TXPKT_PF_V(tx_info->adap->pf));
        cpl->pack = 0;
        cpl->len = htons(pktlen);
-       /* checksum offload */
-       cpl->ctrl1 = 0;
-
-       pos = cpl + 1;
 
        memcpy(buf, skb->data, pktlen);
        if (tx_info->ip_family == AF_INET) {
                /* we need to correct ip header len */
                ip = (struct iphdr *)(buf + maclen);
                ip->tot_len = htons(pktlen - maclen);
+               cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP);
 #if IS_ENABLED(CONFIG_IPV6)
        } else {
                ip6 = (struct ipv6hdr *)(buf + maclen);
                ip6->payload_len = htons(pktlen - maclen - iplen);
+               cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP6);
 #endif
        }
+
+       cntrl1 |= T6_TXPKT_ETHHDR_LEN_V(maclen - ETH_HLEN) |
+                 TXPKT_IPHDR_LEN_V(iplen);
+       /* checksum offload */
+       cpl->ctrl1 = cpu_to_be64(cntrl1);
+
+       pos = cpl + 1;
+
        /* now take care of the tcp header, if fin is not set then clear push
         * bit as well, and if fin is set, it will be sent at the last so we
         * need to update the tcp sequence number as per the last packet.
@@ -1031,71 +1079,6 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
        return 0;
 }
 
-/* chcr_ktls_skb_shift - Shifts request length paged data from skb to another.
- * @tgt- buffer into which tail data gets added
- * @skb- buffer from which the paged data comes from
- * @shiftlen- shift up to this many bytes
- */
-static int chcr_ktls_skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
-                              int shiftlen)
-{
-       skb_frag_t *fragfrom, *fragto;
-       int from, to, todo;
-
-       WARN_ON(shiftlen > skb->data_len);
-
-       todo = shiftlen;
-       from = 0;
-       to = 0;
-       fragfrom = &skb_shinfo(skb)->frags[from];
-
-       while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) {
-               fragfrom = &skb_shinfo(skb)->frags[from];
-               fragto = &skb_shinfo(tgt)->frags[to];
-
-               if (todo >= skb_frag_size(fragfrom)) {
-                       *fragto = *fragfrom;
-                       todo -= skb_frag_size(fragfrom);
-                       from++;
-                       to++;
-
-               } else {
-                       __skb_frag_ref(fragfrom);
-                       skb_frag_page_copy(fragto, fragfrom);
-                       skb_frag_off_copy(fragto, fragfrom);
-                       skb_frag_size_set(fragto, todo);
-
-                       skb_frag_off_add(fragfrom, todo);
-                       skb_frag_size_sub(fragfrom, todo);
-                       todo = 0;
-
-                       to++;
-                       break;
-               }
-       }
-
-       /* Ready to "commit" this state change to tgt */
-       skb_shinfo(tgt)->nr_frags = to;
-
-       /* Reposition in the original skb */
-       to = 0;
-       while (from < skb_shinfo(skb)->nr_frags)
-               skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++];
-
-       skb_shinfo(skb)->nr_frags = to;
-
-       WARN_ON(todo > 0 && !skb_shinfo(skb)->nr_frags);
-
-       skb->len -= shiftlen;
-       skb->data_len -= shiftlen;
-       skb->truesize -= shiftlen;
-       tgt->len += shiftlen;
-       tgt->data_len += shiftlen;
-       tgt->truesize += shiftlen;
-
-       return shiftlen;
-}
-
 /*
  * chcr_ktls_xmit_wr_complete: This sends out the complete record. If an skb
  * received has partial end part of the record, send out the complete record, so
@@ -1111,6 +1094,8 @@ static int chcr_ktls_skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
 static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
                                      struct chcr_ktls_info *tx_info,
                                      struct sge_eth_txq *q, u32 tcp_seq,
+                                     bool is_last_wr, u32 data_len,
+                                     u32 skb_offset, u32 nfrags,
                                      bool tcp_push, u32 mss)
 {
        u32 len16, wr_mid = 0, flits = 0, ndesc, cipher_start;
@@ -1126,7 +1111,7 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
        u64 *end;
 
        /* get the number of flits required */
-       flits = chcr_ktls_get_tx_flits(skb, tx_info->key_ctx_len);
+       flits = chcr_ktls_get_tx_flits(nfrags, tx_info->key_ctx_len);
        /* number of descriptors */
        ndesc = chcr_flits_to_desc(flits);
        /* check if enough credits available */
@@ -1155,6 +1140,9 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
                return NETDEV_TX_BUSY;
        }
 
+       if (!is_last_wr)
+               skb_get(skb);
+
        pos = &q->q.desc[q->q.pidx];
        end = (u64 *)pos + flits;
        /* FW_ULPTX_WR */
@@ -1187,7 +1175,7 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
                      CPL_TX_SEC_PDU_CPLLEN_V(CHCR_CPL_TX_SEC_PDU_LEN_64BIT) |
                      CPL_TX_SEC_PDU_PLACEHOLDER_V(1) |
                      CPL_TX_SEC_PDU_IVINSRTOFST_V(TLS_HEADER_SIZE + 1));
-       cpl->pldlen = htonl(skb->data_len);
+       cpl->pldlen = htonl(data_len);
 
        /* encryption should start after tls header size + iv size */
        cipher_start = TLS_HEADER_SIZE + tx_info->iv_size + 1;
@@ -1229,7 +1217,7 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
        /* CPL_TX_DATA */
        tx_data = (void *)pos;
        OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid));
-       tx_data->len = htonl(TX_DATA_MSS_V(mss) | TX_LENGTH_V(skb->data_len));
+       tx_data->len = htonl(TX_DATA_MSS_V(mss) | TX_LENGTH_V(data_len));
 
        tx_data->rsvd = htonl(tcp_seq);
 
@@ -1249,8 +1237,8 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
        }
 
        /* send the complete packet except the header */
-       cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len,
-                       sgl_sdesc->addr);
+       cxgb4_write_partial_sgl(skb, &q->q, pos, end, sgl_sdesc->addr,
+                               skb_offset, data_len);
        sgl_sdesc->skb = skb;
 
        chcr_txq_advance(&q->q, ndesc);
@@ -1282,10 +1270,11 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb,
                                   struct sge_eth_txq *q,
                                   u32 tcp_seq, bool tcp_push, u32 mss,
                                   u32 tls_rec_offset, u8 *prior_data,
-                                  u32 prior_data_len)
+                                  u32 prior_data_len, u32 data_len,
+                                  u32 skb_offset)
 {
+       u32 len16, wr_mid = 0, cipher_start, nfrags;
        struct adapter *adap = tx_info->adap;
-       u32 len16, wr_mid = 0, cipher_start;
        unsigned int flits = 0, ndesc;
        int credits, left, last_desc;
        struct tx_sw_desc *sgl_sdesc;
@@ -1298,10 +1287,11 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb,
        void *pos;
        u64 *end;
 
+       nfrags = chcr_get_nfrags_to_send(skb, skb_offset, data_len);
        /* get the number of flits required, it's a partial record so 2 flits
         * (AES_BLOCK_SIZE) will be added.
         */
-       flits = chcr_ktls_get_tx_flits(skb, tx_info->key_ctx_len) + 2;
+       flits = chcr_ktls_get_tx_flits(nfrags, tx_info->key_ctx_len) + 2;
        /* get the correct 8 byte IV of this record */
        iv_record = cpu_to_be64(tx_info->iv + tx_info->record_no);
        /* If it's a middle record and not 16 byte aligned to run AES CTR, need
@@ -1373,7 +1363,7 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb,
                htonl(CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) |
                      CPL_TX_SEC_PDU_CPLLEN_V(CHCR_CPL_TX_SEC_PDU_LEN_64BIT) |
                      CPL_TX_SEC_PDU_IVINSRTOFST_V(1));
-       cpl->pldlen = htonl(skb->data_len + AES_BLOCK_LEN + prior_data_len);
+       cpl->pldlen = htonl(data_len + AES_BLOCK_LEN + prior_data_len);
        cpl->aadstart_cipherstop_hi =
                htonl(CPL_TX_SEC_PDU_CIPHERSTART_V(cipher_start));
        cpl->cipherstop_lo_authinsert = 0;
@@ -1404,7 +1394,7 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb,
        tx_data = (void *)pos;
        OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid));
        tx_data->len = htonl(TX_DATA_MSS_V(mss) |
-                       TX_LENGTH_V(skb->data_len + prior_data_len));
+                            TX_LENGTH_V(data_len + prior_data_len));
        tx_data->rsvd = htonl(tcp_seq);
        tx_data->flags = htonl(TX_BYPASS_F);
        if (tcp_push)
@@ -1437,8 +1427,8 @@ static int chcr_ktls_xmit_wr_short(struct sk_buff *skb,
        if (prior_data_len)
                pos = chcr_copy_to_txd(prior_data, &q->q, pos, 16);
        /* send the complete packet except the header */
-       cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len,
-                       sgl_sdesc->addr);
+       cxgb4_write_partial_sgl(skb, &q->q, pos, end, sgl_sdesc->addr,
+                               skb_offset, data_len);
        sgl_sdesc->skb = skb;
 
        chcr_txq_advance(&q->q, ndesc);
@@ -1466,6 +1456,7 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
                                 struct sk_buff *skb, u32 tcp_seq, u32 mss,
                                 bool tcp_push, struct sge_eth_txq *q,
                                 u32 port_id, u8 *prior_data,
+                                u32 data_len, u32 skb_offset,
                                 u32 prior_data_len)
 {
        int credits, left, len16, last_desc;
@@ -1475,14 +1466,16 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
        struct ulptx_idata *idata;
        struct ulp_txpkt *ulptx;
        struct fw_ulptx_wr *wr;
-       u32 wr_mid = 0;
+       u32 wr_mid = 0, nfrags;
        void *pos;
        u64 *end;
 
        flits = DIV_ROUND_UP(CHCR_PLAIN_TX_DATA_LEN, 8);
-       flits += chcr_sgl_len(skb_shinfo(skb)->nr_frags);
+       nfrags = chcr_get_nfrags_to_send(skb, skb_offset, data_len);
+       flits += chcr_sgl_len(nfrags);
        if (prior_data_len)
                flits += 2;
+
        /* WR will need len16 */
        len16 = DIV_ROUND_UP(flits, 2);
        /* check how many descriptors needed */
@@ -1535,7 +1528,7 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
        tx_data = (struct cpl_tx_data *)(idata + 1);
        OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tx_info->tid));
        tx_data->len = htonl(TX_DATA_MSS_V(mss) |
-                       TX_LENGTH_V(skb->data_len + prior_data_len));
+                            TX_LENGTH_V(data_len + prior_data_len));
        /* set tcp seq number */
        tx_data->rsvd = htonl(tcp_seq);
        tx_data->flags = htonl(TX_BYPASS_F);
@@ -1559,8 +1552,8 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
                end = pos + left;
        }
        /* send the complete packet including the header */
-       cxgb4_write_sgl(skb, &q->q, pos, end, skb->len - skb->data_len,
-                       sgl_sdesc->addr);
+       cxgb4_write_partial_sgl(skb, &q->q, pos, end, sgl_sdesc->addr,
+                               skb_offset, data_len);
        sgl_sdesc->skb = skb;
 
        chcr_txq_advance(&q->q, ndesc);
@@ -1568,12 +1561,96 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
        return 0;
 }
 
+static int chcr_ktls_tunnel_pkt(struct chcr_ktls_info *tx_info,
+                               struct sk_buff *skb,
+                               struct sge_eth_txq *q)
+{
+       u32 ctrl, iplen, maclen, wr_mid = 0, len16;
+       struct tx_sw_desc *sgl_sdesc;
+       struct fw_eth_tx_pkt_wr *wr;
+       struct cpl_tx_pkt_core *cpl;
+       unsigned int flits, ndesc;
+       int credits, last_desc;
+       u64 cntrl1, *end;
+       void *pos;
+
+       ctrl = sizeof(*cpl);
+       flits = DIV_ROUND_UP(sizeof(*wr) + ctrl, 8);
+
+       flits += chcr_sgl_len(skb_shinfo(skb)->nr_frags + 1);
+       len16 = DIV_ROUND_UP(flits, 2);
+       /* check how many descriptors needed */
+       ndesc = DIV_ROUND_UP(flits, 8);
+
+       credits = chcr_txq_avail(&q->q) - ndesc;
+       if (unlikely(credits < 0)) {
+               chcr_eth_txq_stop(q);
+               return -ENOMEM;
+       }
+
+       if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+               chcr_eth_txq_stop(q);
+               wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+       }
+
+       last_desc = q->q.pidx + ndesc - 1;
+       if (last_desc >= q->q.size)
+               last_desc -= q->q.size;
+       sgl_sdesc = &q->q.sdesc[last_desc];
+
+       if (unlikely(cxgb4_map_skb(tx_info->adap->pdev_dev, skb,
+                                  sgl_sdesc->addr) < 0)) {
+               memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
+               q->mapping_err++;
+               return -ENOMEM;
+       }
+
+       iplen = skb_network_header_len(skb);
+       maclen = skb_mac_header_len(skb);
+
+       pos = &q->q.desc[q->q.pidx];
+       end = (u64 *)pos + flits;
+       wr = pos;
+
+       /* Firmware work request header */
+       wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) |
+                              FW_WR_IMMDLEN_V(ctrl));
+
+       wr->equiq_to_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
+       wr->r3 = 0;
+
+       cpl = (void *)(wr + 1);
+
+       /* CPL header */
+       cpl->ctrl0 = htonl(TXPKT_OPCODE_V(CPL_TX_PKT) |
+                          TXPKT_INTF_V(tx_info->tx_chan) |
+                          TXPKT_PF_V(tx_info->adap->pf));
+       cpl->pack = 0;
+       cntrl1 = TXPKT_CSUM_TYPE_V(tx_info->ip_family == AF_INET ?
+                                  TX_CSUM_TCPIP : TX_CSUM_TCPIP6);
+       cntrl1 |= T6_TXPKT_ETHHDR_LEN_V(maclen - ETH_HLEN) |
+                 TXPKT_IPHDR_LEN_V(iplen);
+       /* checksum offload */
+       cpl->ctrl1 = cpu_to_be64(cntrl1);
+       cpl->len = htons(skb->len);
+
+       pos = cpl + 1;
+
+       cxgb4_write_sgl(skb, &q->q, pos, end, 0, sgl_sdesc->addr);
+       sgl_sdesc->skb = skb;
+       chcr_txq_advance(&q->q, ndesc);
+       cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc);
+       return 0;
+}
+
 /*
  * chcr_ktls_copy_record_in_skb
  * @nskb - new skb where the frags to be added.
+ * @skb - old skb, to copy socket and destructor details.
  * @record - specific record which has complete 16k record in frags.
  */
 static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb,
+                                        struct sk_buff *skb,
                                         struct tls_record_info *record)
 {
        int i = 0;
@@ -1588,6 +1665,9 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb,
        nskb->data_len = record->len;
        nskb->len += record->len;
        nskb->truesize += record->len;
+       nskb->sk = skb->sk;
+       nskb->destructor = skb->destructor;
+       refcount_add(nskb->truesize, &nskb->sk->sk_wmem_alloc);
 }
 
 /*
@@ -1659,7 +1739,7 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info,
                                 struct sk_buff *skb,
                                 struct tls_record_info *record,
                                 u32 tcp_seq, int mss, bool tcp_push_no_fin,
-                                struct sge_eth_txq *q,
+                                struct sge_eth_txq *q, u32 skb_offset,
                                 u32 tls_end_offset, bool last_wr)
 {
        struct sk_buff *nskb = NULL;
@@ -1668,30 +1748,37 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info,
                nskb = skb;
                atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_complete_pkts);
        } else {
-               dev_kfree_skb_any(skb);
-
-               nskb = alloc_skb(0, GFP_KERNEL);
-               if (!nskb)
+               nskb = alloc_skb(0, GFP_ATOMIC);
+               if (!nskb) {
+                       dev_kfree_skb_any(skb);
                        return NETDEV_TX_BUSY;
+               }
+
                /* copy complete record in skb */
-               chcr_ktls_copy_record_in_skb(nskb, record);
+               chcr_ktls_copy_record_in_skb(nskb, skb, record);
                /* packet is being sent from the beginning, update the tcp_seq
                 * accordingly.
                 */
                tcp_seq = tls_record_start_seq(record);
-               /* reset snd una, so the middle record won't send the already
-                * sent part.
-                */
-               if (chcr_ktls_update_snd_una(tx_info, q))
-                       goto out;
+               /* reset skb offset */
+               skb_offset = 0;
+
+               if (last_wr)
+                       dev_kfree_skb_any(skb);
+
+               last_wr = true;
+
                atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_end_pkts);
        }
 
        if (chcr_ktls_xmit_wr_complete(nskb, tx_info, q, tcp_seq,
+                                      last_wr, record->len, skb_offset,
+                                      record->num_frags,
                                       (last_wr && tcp_push_no_fin),
                                       mss)) {
                goto out;
        }
+       tx_info->prev_seq = record->end_seq;
        return 0;
 out:
        dev_kfree_skb_any(nskb);
@@ -1723,41 +1810,47 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info,
                                     struct sk_buff *skb,
                                     struct tls_record_info *record,
                                     u32 tcp_seq, int mss, bool tcp_push_no_fin,
+                                    u32 data_len, u32 skb_offset,
                                     struct sge_eth_txq *q, u32 tls_end_offset)
 {
        u32 tls_rec_offset = tcp_seq - tls_record_start_seq(record);
        u8 prior_data[16] = {0};
        u32 prior_data_len = 0;
-       u32 data_len;
 
        /* check if the skb is ending in middle of tag/HASH, its a big
         * trouble, send the packet before the HASH.
         */
-       int remaining_record = tls_end_offset - skb->data_len;
+       int remaining_record = tls_end_offset - data_len;
 
        if (remaining_record > 0 &&
            remaining_record < TLS_CIPHER_AES_GCM_128_TAG_SIZE) {
-               int trimmed_len = skb->data_len -
-                       (TLS_CIPHER_AES_GCM_128_TAG_SIZE - remaining_record);
-               struct sk_buff *tmp_skb = NULL;
-               /* don't process the pkt if it is only a partial tag */
-               if (skb->data_len < TLS_CIPHER_AES_GCM_128_TAG_SIZE)
-                       goto out;
+               int trimmed_len = 0;
 
-               WARN_ON(trimmed_len > skb->data_len);
+               if (tls_end_offset > TLS_CIPHER_AES_GCM_128_TAG_SIZE)
+                       trimmed_len = data_len -
+                                     (TLS_CIPHER_AES_GCM_128_TAG_SIZE -
+                                      remaining_record);
+               if (!trimmed_len)
+                       return FALLBACK;
 
-               /* shift to those many bytes */
-               tmp_skb = alloc_skb(0, GFP_KERNEL);
-               if (unlikely(!tmp_skb))
-                       goto out;
+               WARN_ON(trimmed_len > data_len);
 
-               chcr_ktls_skb_shift(tmp_skb, skb, trimmed_len);
-               /* free the last trimmed portion */
-               dev_kfree_skb_any(skb);
-               skb = tmp_skb;
+               data_len = trimmed_len;
                atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_trimmed_pkts);
        }
-       data_len = skb->data_len;
+
+       /* check if it is only the header part. */
+       if (tls_rec_offset + data_len <= (TLS_HEADER_SIZE + tx_info->iv_size)) {
+               if (chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss,
+                                         tcp_push_no_fin, q,
+                                         tx_info->port_id, prior_data,
+                                         data_len, skb_offset, prior_data_len))
+                       goto out;
+
+               tx_info->prev_seq = tcp_seq + data_len;
+               return 0;
+       }
+
        /* check if the middle record's start point is 16 byte aligned. CTR
         * needs 16 byte aligned start point to start encryption.
         */
@@ -1818,9 +1911,6 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info,
                        }
                        /* reset tcp_seq as per the prior_data_required len */
                        tcp_seq -= prior_data_len;
-                       /* include prio_data_len for  further calculation.
-                        */
-                       data_len += prior_data_len;
                }
                /* reset snd una, so the middle record won't send the already
                 * sent part.
@@ -1829,37 +1919,54 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info,
                        goto out;
                atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_middle_pkts);
        } else {
-               /* Else means, its a partial first part of the record. Check if
-                * its only the header, don't need to send for encryption then.
-                */
-               if (data_len <= TLS_HEADER_SIZE + tx_info->iv_size) {
-                       if (chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss,
-                                                 tcp_push_no_fin, q,
-                                                 tx_info->port_id,
-                                                 prior_data,
-                                                 prior_data_len)) {
-                               goto out;
-                       }
-                       return 0;
-               }
                atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_start_pkts);
        }
 
        if (chcr_ktls_xmit_wr_short(skb, tx_info, q, tcp_seq, tcp_push_no_fin,
                                    mss, tls_rec_offset, prior_data,
-                                   prior_data_len)) {
+                                   prior_data_len, data_len, skb_offset)) {
                goto out;
        }
 
+       tx_info->prev_seq = tcp_seq + data_len + prior_data_len;
        return 0;
 out:
        dev_kfree_skb_any(skb);
        return NETDEV_TX_BUSY;
 }
 
+static int chcr_ktls_sw_fallback(struct sk_buff *skb,
+                                struct chcr_ktls_info *tx_info,
+                                struct sge_eth_txq *q)
+{
+       u32 data_len, skb_offset;
+       struct sk_buff *nskb;
+       struct tcphdr *th;
+
+       nskb = tls_encrypt_skb(skb);
+
+       if (!nskb)
+               return 0;
+
+       th = tcp_hdr(nskb);
+       skb_offset =  skb_transport_offset(nskb) + tcp_hdrlen(nskb);
+       data_len = nskb->len - skb_offset;
+       skb_tx_timestamp(nskb);
+
+       if (chcr_ktls_tunnel_pkt(tx_info, nskb, q))
+               goto out;
+
+       tx_info->prev_seq = ntohl(th->seq) + data_len;
+       atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_fallback);
+       return 0;
+out:
+       dev_kfree_skb_any(nskb);
+       return 0;
+}
 /* nic tls TX handler */
 static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+       u32 tls_end_offset, tcp_seq, skb_data_len, skb_offset;
        struct ch_ktls_port_stats_debug *port_stats;
        struct chcr_ktls_ofld_ctx_tx *tx_ctx;
        struct ch_ktls_stats_debug *stats;
@@ -1867,20 +1974,17 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
        int data_len, qidx, ret = 0, mss;
        struct tls_record_info *record;
        struct chcr_ktls_info *tx_info;
-       u32 tls_end_offset, tcp_seq;
        struct tls_context *tls_ctx;
-       struct sk_buff *local_skb;
        struct sge_eth_txq *q;
        struct adapter *adap;
        unsigned long flags;
 
        tcp_seq = ntohl(th->seq);
+       skb_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       skb_data_len = skb->len - skb_offset;
+       data_len = skb_data_len;
 
-       mss = skb_is_gso(skb) ? skb_shinfo(skb)->gso_size : skb->data_len;
-
-       /* check if we haven't set it for ktls offload */
-       if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
-               goto out;
+       mss = skb_is_gso(skb) ? skb_shinfo(skb)->gso_size : data_len;
 
        tls_ctx = tls_get_ctx(skb->sk);
        if (unlikely(tls_ctx->netdev != dev))
@@ -1892,14 +1996,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
        if (unlikely(!tx_info))
                goto out;
 
-       /* don't touch the original skb, make a new skb to extract each records
-        * and send them separately.
-        */
-       local_skb = alloc_skb(0, GFP_KERNEL);
-
-       if (unlikely(!local_skb))
-               return NETDEV_TX_BUSY;
-
        adap = tx_info->adap;
        stats = &adap->ch_ktls_stats;
        port_stats = &stats->ktls_port[tx_info->port_id];
@@ -1914,20 +2010,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
                if (ret)
                        return NETDEV_TX_BUSY;
        }
-       /* update tcb */
-       ret = chcr_ktls_xmit_tcb_cpls(tx_info, q, ntohl(th->seq),
-                                     ntohl(th->ack_seq),
-                                     ntohs(th->window));
-       if (ret) {
-               dev_kfree_skb_any(local_skb);
-               return NETDEV_TX_BUSY;
-       }
 
-       /* copy skb contents into local skb */
-       chcr_ktls_skb_copy(skb, local_skb);
-
-       /* go through the skb and send only one record at a time. */
-       data_len = skb->data_len;
        /* TCP segments can be in received either complete or partial.
         * chcr_end_part_handler will handle cases if complete record or end
         * part of the record is received. Incase of partial end part of record,
@@ -1952,10 +2035,64 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
                        goto out;
                }
 
+               tls_end_offset = record->end_seq - tcp_seq;
+
+               pr_debug("seq 0x%x, end_seq 0x%x prev_seq 0x%x, datalen 0x%x\n",
+                        tcp_seq, record->end_seq, tx_info->prev_seq, data_len);
+               /* update tcb for the skb */
+               if (skb_data_len == data_len) {
+                       u32 tx_max = tcp_seq;
+
+                       if (!tls_record_is_start_marker(record) &&
+                           tls_end_offset < TLS_CIPHER_AES_GCM_128_TAG_SIZE)
+                               tx_max = record->end_seq -
+                                       TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+
+                       ret = chcr_ktls_xmit_tcb_cpls(tx_info, q, tx_max,
+                                                     ntohl(th->ack_seq),
+                                                     ntohs(th->window),
+                                                     tls_end_offset !=
+                                                     record->len);
+                       if (ret) {
+                               spin_unlock_irqrestore(&tx_ctx->base.lock,
+                                                      flags);
+                               goto out;
+                       }
+
+                       if (th->fin)
+                               skb_get(skb);
+               }
+
                if (unlikely(tls_record_is_start_marker(record))) {
-                       spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
                        atomic64_inc(&port_stats->ktls_tx_skip_no_sync_data);
-                       goto out;
+                       /* If tls_end_offset < data_len, means there is some
+                        * data after start marker, which needs encryption, send
+                        * plaintext first and take skb refcount. else send out
+                        * complete pkt as plaintext.
+                        */
+                       if (tls_end_offset < data_len)
+                               skb_get(skb);
+                       else
+                               tls_end_offset = data_len;
+
+                       ret = chcr_ktls_tx_plaintxt(tx_info, skb, tcp_seq, mss,
+                                                   (!th->fin && th->psh), q,
+                                                   tx_info->port_id, NULL,
+                                                   tls_end_offset, skb_offset,
+                                                   0);
+
+                       spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
+                       if (ret) {
+                               /* free the refcount taken earlier */
+                               if (tls_end_offset < data_len)
+                                       dev_kfree_skb_any(skb);
+                               goto out;
+                       }
+
+                       data_len -= tls_end_offset;
+                       tcp_seq = record->end_seq;
+                       skb_offset += tls_end_offset;
+                       continue;
                }
 
                /* increase page reference count of the record, so that there
@@ -1967,73 +2104,64 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
                /* lock cleared */
                spin_unlock_irqrestore(&tx_ctx->base.lock, flags);
 
-               tls_end_offset = record->end_seq - tcp_seq;
 
-               pr_debug("seq 0x%x, end_seq 0x%x prev_seq 0x%x, datalen 0x%x\n",
-                        tcp_seq, record->end_seq, tx_info->prev_seq, data_len);
                /* if a tls record is finishing in this SKB */
                if (tls_end_offset <= data_len) {
-                       struct sk_buff *nskb = NULL;
-
-                       if (tls_end_offset < data_len) {
-                               nskb = alloc_skb(0, GFP_KERNEL);
-                               if (unlikely(!nskb)) {
-                                       ret = -ENOMEM;
-                                       goto clear_ref;
-                               }
-
-                               chcr_ktls_skb_shift(nskb, local_skb,
-                                                   tls_end_offset);
-                       } else {
-                               /* its the only record in this skb, directly
-                                * point it.
-                                */
-                               nskb = local_skb;
-                       }
-                       ret = chcr_end_part_handler(tx_info, nskb, record,
+                       ret = chcr_end_part_handler(tx_info, skb, record,
                                                    tcp_seq, mss,
                                                    (!th->fin && th->psh), q,
+                                                   skb_offset,
                                                    tls_end_offset,
-                                                   (nskb == local_skb));
-
-                       if (ret && nskb != local_skb)
-                               dev_kfree_skb_any(local_skb);
+                                                   skb_offset +
+                                                   tls_end_offset == skb->len);
 
                        data_len -= tls_end_offset;
                        /* tcp_seq increment is required to handle next record.
                         */
                        tcp_seq += tls_end_offset;
+                       skb_offset += tls_end_offset;
                } else {
-                       ret = chcr_short_record_handler(tx_info, local_skb,
+                       ret = chcr_short_record_handler(tx_info, skb,
                                                        record, tcp_seq, mss,
                                                        (!th->fin && th->psh),
+                                                       data_len, skb_offset,
                                                        q, tls_end_offset);
                        data_len = 0;
                }
-clear_ref:
+
                /* clear the frag ref count which increased locally before */
                for (i = 0; i < record->num_frags; i++) {
                        /* clear the frag ref count */
                        __skb_frag_unref(&record->frags[i]);
                }
                /* if any failure, come out from the loop. */
-               if (ret)
-                       goto out;
+               if (ret) {
+                       if (th->fin)
+                               dev_kfree_skb_any(skb);
+
+                       if (ret == FALLBACK)
+                               return chcr_ktls_sw_fallback(skb, tx_info, q);
+
+                       return NETDEV_TX_OK;
+               }
+
                /* length should never be less than 0 */
                WARN_ON(data_len < 0);
 
        } while (data_len > 0);
 
-       tx_info->prev_seq = ntohl(th->seq) + skb->data_len;
        atomic64_inc(&port_stats->ktls_tx_encrypted_packets);
-       atomic64_add(skb->data_len, &port_stats->ktls_tx_encrypted_bytes);
+       atomic64_add(skb_data_len, &port_stats->ktls_tx_encrypted_bytes);
 
        /* tcp finish is set, send a separate tcp msg including all the options
         * as well.
         */
-       if (th->fin)
+       if (th->fin) {
                chcr_ktls_write_tcp_options(tx_info, skb, q, tx_info->tx_chan);
+               dev_kfree_skb_any(skb);
+       }
 
+       return NETDEV_TX_OK;
 out:
        dev_kfree_skb_any(skb);
        return NETDEV_TX_OK;
index c1651b1..18b3b1f 100644 (file)
@@ -26,6 +26,7 @@
 
 #define CHCR_KTLS_WR_SIZE      (CHCR_PLAIN_TX_DATA_LEN +\
                                 sizeof(struct cpl_tx_sec_pdu))
+#define FALLBACK               35
 
 enum ch_ktls_open_state {
        CH_KTLS_OPEN_SUCCESS = 0,
index c96e2f2..4919d22 100644 (file)
@@ -2713,6 +2713,10 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
                                spin_unlock_bh(&vsi->mac_filter_hash_lock);
                                goto error_param;
                        }
+                       if (is_valid_ether_addr(al->list[i].addr) &&
+                           is_zero_ether_addr(vf->default_lan_addr.addr))
+                               ether_addr_copy(vf->default_lan_addr.addr,
+                                               al->list[i].addr);
                }
        }
        spin_unlock_bh(&vsi->mac_filter_hash_lock);
@@ -2740,6 +2744,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
 {
        struct virtchnl_ether_addr_list *al =
            (struct virtchnl_ether_addr_list *)msg;
+       bool was_unimac_deleted = false;
        struct i40e_pf *pf = vf->pf;
        struct i40e_vsi *vsi = NULL;
        i40e_status ret = 0;
@@ -2759,6 +2764,8 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
                        ret = I40E_ERR_INVALID_MAC_ADDR;
                        goto error_param;
                }
+               if (ether_addr_equal(al->list[i].addr, vf->default_lan_addr.addr))
+                       was_unimac_deleted = true;
        }
        vsi = pf->vsi[vf->lan_vsi_idx];
 
@@ -2779,10 +2786,25 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
                dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n",
                        vf->vf_id, ret);
 
+       if (vf->trusted && was_unimac_deleted) {
+               struct i40e_mac_filter *f;
+               struct hlist_node *h;
+               u8 *macaddr = NULL;
+               int bkt;
+
+               /* set last unicast mac address as default */
+               spin_lock_bh(&vsi->mac_filter_hash_lock);
+               hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+                       if (is_valid_ether_addr(f->macaddr))
+                               macaddr = f->macaddr;
+               }
+               if (macaddr)
+                       ether_addr_copy(vf->default_lan_addr.addr, macaddr);
+               spin_unlock_bh(&vsi->mac_filter_hash_lock);
+       }
 error_param:
        /* send the response to the VF */
-       return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_ETH_ADDR,
-                                      ret);
+       return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_ETH_ADDR, ret);
 }
 
 /**
index 6acede0..567fd67 100644 (file)
@@ -281,8 +281,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
        unsigned int total_rx_bytes = 0, total_rx_packets = 0;
        u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
        unsigned int xdp_res, xdp_xmit = 0;
+       bool failure = false;
        struct sk_buff *skb;
-       bool failure;
 
        while (likely(total_rx_packets < (unsigned int)budget)) {
                union i40e_rx_desc *rx_desc;
index 9112dff..b673ac1 100644 (file)
@@ -3891,21 +3891,23 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu)
 }
 
 /**
- * igc_get_stats - Get System Network Statistics
+ * igc_get_stats64 - Get System Network Statistics
  * @netdev: network interface device structure
+ * @stats: rtnl_link_stats64 pointer
  *
  * Returns the address of the device statistics structure.
  * The statistics are updated here and also from the timer callback.
  */
-static struct net_device_stats *igc_get_stats(struct net_device *netdev)
+static void igc_get_stats64(struct net_device *netdev,
+                           struct rtnl_link_stats64 *stats)
 {
        struct igc_adapter *adapter = netdev_priv(netdev);
 
+       spin_lock(&adapter->stats64_lock);
        if (!test_bit(__IGC_RESETTING, &adapter->state))
                igc_update_stats(adapter);
-
-       /* only return the current stats */
-       return &netdev->stats;
+       memcpy(stats, &adapter->stats64, sizeof(*stats));
+       spin_unlock(&adapter->stats64_lock);
 }
 
 static netdev_features_t igc_fix_features(struct net_device *netdev,
@@ -4855,7 +4857,7 @@ static const struct net_device_ops igc_netdev_ops = {
        .ndo_set_rx_mode        = igc_set_rx_mode,
        .ndo_set_mac_address    = igc_set_mac,
        .ndo_change_mtu         = igc_change_mtu,
-       .ndo_get_stats          = igc_get_stats,
+       .ndo_get_stats64        = igc_get_stats64,
        .ndo_fix_features       = igc_fix_features,
        .ndo_set_features       = igc_set_features,
        .ndo_features_check     = igc_features_check,
index b1fcc44..b6f20e2 100644 (file)
@@ -6,6 +6,7 @@
 config PRESTERA
        tristate "Marvell Prestera Switch ASICs support"
        depends on NET_SWITCHDEV && VLAN_8021Q
+       depends on BRIDGE || BRIDGE=n
        select NET_DEVLINK
        help
          This driver supports Marvell Prestera Switch ASICs family.
index e36e505..d29af7b 100644 (file)
@@ -107,12 +107,16 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
                mlx5e_tc_encap_flows_del(priv, e, &flow_list);
 
        if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
+               struct net_device *route_dev;
+
                ether_addr_copy(e->h_dest, ha);
                ether_addr_copy(eth->h_dest, ha);
                /* Update the encap source mac, in case that we delete
                 * the flows when encap source mac changed.
                 */
-               ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
+               route_dev = __dev_get_by_index(dev_net(priv->netdev), e->route_dev_ifindex);
+               if (route_dev)
+                       ether_addr_copy(eth->h_source, route_dev->dev_addr);
 
                mlx5e_tc_encap_flows_add(priv, e, &flow_list);
        }
index 7cce85f..90930e5 100644 (file)
@@ -77,13 +77,13 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
        return 0;
 }
 
-static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
-                                  struct net_device *mirred_dev,
-                                  struct net_device **out_dev,
-                                  struct net_device **route_dev,
-                                  struct flowi4 *fl4,
-                                  struct neighbour **out_n,
-                                  u8 *out_ttl)
+static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
+                                      struct net_device *mirred_dev,
+                                      struct net_device **out_dev,
+                                      struct net_device **route_dev,
+                                      struct flowi4 *fl4,
+                                      struct neighbour **out_n,
+                                      u8 *out_ttl)
 {
        struct neighbour *n;
        struct rtable *rt;
@@ -117,18 +117,28 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
                ip_rt_put(rt);
                return ret;
        }
+       dev_hold(*route_dev);
 
        if (!(*out_ttl))
                *out_ttl = ip4_dst_hoplimit(&rt->dst);
        n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
        ip_rt_put(rt);
-       if (!n)
+       if (!n) {
+               dev_put(*route_dev);
                return -ENOMEM;
+       }
 
        *out_n = n;
        return 0;
 }
 
+static void mlx5e_route_lookup_ipv4_put(struct net_device *route_dev,
+                                       struct neighbour *n)
+{
+       neigh_release(n);
+       dev_put(route_dev);
+}
+
 static const char *mlx5e_netdev_kind(struct net_device *dev)
 {
        if (dev->rtnl_link_ops)
@@ -193,8 +203,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
        fl4.saddr = tun_key->u.ipv4.src;
        ttl = tun_key->ttl;
 
-       err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev,
-                                     &fl4, &n, &ttl);
+       err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &out_dev, &route_dev,
+                                         &fl4, &n, &ttl);
        if (err)
                return err;
 
@@ -223,7 +233,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
        e->m_neigh.family = n->ops->family;
        memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
        e->out_dev = out_dev;
-       e->route_dev = route_dev;
+       e->route_dev_ifindex = route_dev->ifindex;
 
        /* It's important to add the neigh to the hash table before checking
         * the neigh validity state. So if we'll get a notification, in case the
@@ -278,7 +288,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
 
        e->flags |= MLX5_ENCAP_ENTRY_VALID;
        mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
-       neigh_release(n);
+       mlx5e_route_lookup_ipv4_put(route_dev, n);
        return err;
 
 destroy_neigh_entry:
@@ -286,18 +296,18 @@ destroy_neigh_entry:
 free_encap:
        kfree(encap_header);
 release_neigh:
-       neigh_release(n);
+       mlx5e_route_lookup_ipv4_put(route_dev, n);
        return err;
 }
 
 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
-static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
-                                  struct net_device *mirred_dev,
-                                  struct net_device **out_dev,
-                                  struct net_device **route_dev,
-                                  struct flowi6 *fl6,
-                                  struct neighbour **out_n,
-                                  u8 *out_ttl)
+static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
+                                      struct net_device *mirred_dev,
+                                      struct net_device **out_dev,
+                                      struct net_device **route_dev,
+                                      struct flowi6 *fl6,
+                                      struct neighbour **out_n,
+                                      u8 *out_ttl)
 {
        struct dst_entry *dst;
        struct neighbour *n;
@@ -318,15 +328,25 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
                return ret;
        }
 
+       dev_hold(*route_dev);
        n = dst_neigh_lookup(dst, &fl6->daddr);
        dst_release(dst);
-       if (!n)
+       if (!n) {
+               dev_put(*route_dev);
                return -ENOMEM;
+       }
 
        *out_n = n;
        return 0;
 }
 
+static void mlx5e_route_lookup_ipv6_put(struct net_device *route_dev,
+                                       struct neighbour *n)
+{
+       neigh_release(n);
+       dev_put(route_dev);
+}
+
 int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
                                    struct net_device *mirred_dev,
                                    struct mlx5e_encap_entry *e)
@@ -348,8 +368,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
        fl6.daddr = tun_key->u.ipv6.dst;
        fl6.saddr = tun_key->u.ipv6.src;
 
-       err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev,
-                                     &fl6, &n, &ttl);
+       err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &out_dev, &route_dev,
+                                         &fl6, &n, &ttl);
        if (err)
                return err;
 
@@ -378,7 +398,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
        e->m_neigh.family = n->ops->family;
        memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
        e->out_dev = out_dev;
-       e->route_dev = route_dev;
+       e->route_dev_ifindex = route_dev->ifindex;
 
        /* It's importent to add the neigh to the hash table before checking
         * the neigh validity state. So if we'll get a notification, in case the
@@ -433,7 +453,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
 
        e->flags |= MLX5_ENCAP_ENTRY_VALID;
        mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
-       neigh_release(n);
+       mlx5e_route_lookup_ipv6_put(route_dev, n);
        return err;
 
 destroy_neigh_entry:
@@ -441,7 +461,7 @@ destroy_neigh_entry:
 free_encap:
        kfree(encap_header);
 release_neigh:
-       neigh_release(n);
+       mlx5e_route_lookup_ipv6_put(route_dev, n);
        return err;
 }
 #endif
index 4e574ac..be3465b 100644 (file)
@@ -122,9 +122,9 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c)
        set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
        /* TX queue is created active. */
 
-       spin_lock(&c->async_icosq_lock);
+       spin_lock_bh(&c->async_icosq_lock);
        mlx5e_trigger_irq(&c->async_icosq);
-       spin_unlock(&c->async_icosq_lock);
+       spin_unlock_bh(&c->async_icosq_lock);
 }
 
 void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
index fb671a4..8e96260 100644 (file)
@@ -36,9 +36,9 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
                if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state))
                        return 0;
 
-               spin_lock(&c->async_icosq_lock);
+               spin_lock_bh(&c->async_icosq_lock);
                mlx5e_trigger_irq(&c->async_icosq);
-               spin_unlock(&c->async_icosq_lock);
+               spin_unlock_bh(&c->async_icosq_lock);
        }
 
        return 0;
index ccaccb9..7f6221b 100644 (file)
@@ -188,7 +188,7 @@ static int post_rx_param_wqes(struct mlx5e_channel *c,
 
        err = 0;
        sq = &c->async_icosq;
-       spin_lock(&c->async_icosq_lock);
+       spin_lock_bh(&c->async_icosq_lock);
 
        cseg = post_static_params(sq, priv_rx);
        if (IS_ERR(cseg))
@@ -199,7 +199,7 @@ static int post_rx_param_wqes(struct mlx5e_channel *c,
 
        mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
 unlock:
-       spin_unlock(&c->async_icosq_lock);
+       spin_unlock_bh(&c->async_icosq_lock);
 
        return err;
 
@@ -265,10 +265,10 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
 
        BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1);
 
-       spin_lock(&sq->channel->async_icosq_lock);
+       spin_lock_bh(&sq->channel->async_icosq_lock);
 
        if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
-               spin_unlock(&sq->channel->async_icosq_lock);
+               spin_unlock_bh(&sq->channel->async_icosq_lock);
                err = -ENOSPC;
                goto err_dma_unmap;
        }
@@ -299,7 +299,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
        icosq_fill_wi(sq, pi, &wi);
        sq->pc++;
        mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
-       spin_unlock(&sq->channel->async_icosq_lock);
+       spin_unlock_bh(&sq->channel->async_icosq_lock);
 
        return 0;
 
@@ -360,7 +360,7 @@ static int resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx
        err = 0;
 
        sq = &c->async_icosq;
-       spin_lock(&c->async_icosq_lock);
+       spin_lock_bh(&c->async_icosq_lock);
 
        cseg = post_static_params(sq, priv_rx);
        if (IS_ERR(cseg)) {
@@ -372,7 +372,7 @@ static int resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx
        mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
        priv_rx->stats->tls_resync_res_ok++;
 unlock:
-       spin_unlock(&c->async_icosq_lock);
+       spin_unlock_bh(&c->async_icosq_lock);
 
        return err;
 }
index b3f02aa..ebce979 100644 (file)
@@ -5253,6 +5253,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 
        mlx5e_disable_async_events(priv);
        mlx5_lag_remove(mdev);
+       mlx5_vxlan_reset_to_default(mdev->vxlan);
 }
 
 int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
index 9020d14..8932c38 100644 (file)
@@ -186,7 +186,7 @@ struct mlx5e_encap_entry {
        unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
 
        struct net_device *out_dev;
-       struct net_device *route_dev;
+       int route_dev_ifindex;
        struct mlx5e_tc_tunnel *tunnel;
        int reformat_type;
        u8 flags;
index 599f5b5..6628a01 100644 (file)
@@ -1584,7 +1584,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
        } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
 
 out:
-       if (rq->xdp_prog)
+       if (rcu_access_pointer(rq->xdp_prog))
                mlx5e_xdp_rx_poll_complete(rq);
 
        mlx5_cqwq_update_db_record(cqwq);
index e3a968e..2e2fa04 100644 (file)
@@ -4658,6 +4658,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
        return flow;
 
 err_free:
+       dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
        mlx5e_flow_put(priv, flow);
 out:
        return ERR_PTR(err);
@@ -4802,6 +4803,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
        return 0;
 
 err_free:
+       dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
        mlx5e_flow_put(priv, flow);
 out:
        return err;
index 6e6a9a5..e8e6294 100644 (file)
@@ -1902,8 +1902,6 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
                ether_addr_copy(hw_addr, vport->info.mac);
                *hw_addr_len = ETH_ALEN;
                err = 0;
-       } else {
-               NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled");
        }
        mutex_unlock(&esw->state_lock);
        return err;
index 1609183..325a5b0 100644 (file)
@@ -2010,10 +2010,11 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
        down_write_ref_node(&fte->node, false);
        for (i = handle->num_rules - 1; i >= 0; i--)
                tree_remove_node(&handle->rule[i]->node, true);
-       if (fte->modify_mask && fte->dests_size) {
-               modify_fte(fte);
+       if (fte->dests_size) {
+               if (fte->modify_mask)
+                       modify_fte(fte);
                up_write_ref_node(&fte->node, false);
-       } else {
+       } else if (list_empty(&fte->node.children)) {
                del_hw_fte(&fte->node);
                /* Avoid double call to del_hw_fte */
                fte->node.del_hw_func = NULL;
index 3315afe..3808440 100644 (file)
@@ -167,6 +167,17 @@ struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
 }
 
 void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
+{
+       if (!mlx5_vxlan_allowed(vxlan))
+               return;
+
+       mlx5_vxlan_del_port(vxlan, IANA_VXLAN_UDP_PORT);
+       WARN_ON(!hash_empty(vxlan->htable));
+
+       kfree(vxlan);
+}
+
+void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan)
 {
        struct mlx5_vxlan_port *vxlanp;
        struct hlist_node *tmp;
@@ -175,12 +186,12 @@ void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
        if (!mlx5_vxlan_allowed(vxlan))
                return;
 
-       /* Lockless since we are the only hash table consumers*/
        hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) {
-               hash_del(&vxlanp->hlist);
-               mlx5_vxlan_core_del_port_cmd(vxlan->mdev, vxlanp->udp_port);
-               kfree(vxlanp);
+               /* Don't delete default UDP port added by the HW.
+                * Remove only user configured ports
+                */
+               if (vxlanp->udp_port == IANA_VXLAN_UDP_PORT)
+                       continue;
+               mlx5_vxlan_del_port(vxlan, vxlanp->udp_port);
        }
-
-       kfree(vxlan);
 }
index ec76652..34ef662 100644 (file)
@@ -56,6 +56,7 @@ void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan);
 int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port);
 int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port);
 bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
+void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan);
 #else
 static inline struct mlx5_vxlan*
 mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-EOPNOTSUPP); }
@@ -63,6 +64,7 @@ static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; }
 static inline int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
 static inline int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port) { return -EOPNOTSUPP; }
 static inline bool mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port) { return false; }
+static inline void mlx5_vxlan_reset_to_default(struct mlx5_vxlan *vxlan) { return; }
 #endif
 
 #endif /* __MLX5_VXLAN_H__ */
index a193884..e2c99d9 100644 (file)
@@ -674,14 +674,12 @@ clean_up:
 static int lan743x_dp_write(struct lan743x_adapter *adapter,
                            u32 select, u32 addr, u32 length, u32 *buf)
 {
-       int ret = -EIO;
        u32 dp_sel;
        int i;
 
-       mutex_lock(&adapter->dp_lock);
        if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
                                     1, 40, 100, 100))
-               goto unlock;
+               return -EIO;
        dp_sel = lan743x_csr_read(adapter, DP_SEL);
        dp_sel &= ~DP_SEL_MASK_;
        dp_sel |= select;
@@ -693,13 +691,10 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter,
                lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_);
                if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_,
                                             1, 40, 100, 100))
-                       goto unlock;
+                       return -EIO;
        }
-       ret = 0;
 
-unlock:
-       mutex_unlock(&adapter->dp_lock);
-       return ret;
+       return 0;
 }
 
 static u32 lan743x_mac_mii_access(u16 id, u16 index, int read)
@@ -1019,16 +1014,16 @@ static void lan743x_phy_close(struct lan743x_adapter *adapter)
 static int lan743x_phy_open(struct lan743x_adapter *adapter)
 {
        struct lan743x_phy *phy = &adapter->phy;
+       struct phy_device *phydev = NULL;
        struct device_node *phynode;
-       struct phy_device *phydev;
        struct net_device *netdev;
        int ret = -EIO;
 
        netdev = adapter->netdev;
        phynode = of_node_get(adapter->pdev->dev.of_node);
-       adapter->phy_mode = PHY_INTERFACE_MODE_GMII;
 
        if (phynode) {
+               /* try devicetree phy, or fixed link */
                of_get_phy_mode(phynode, &adapter->phy_mode);
 
                if (of_phy_is_fixed_link(phynode)) {
@@ -1044,13 +1039,15 @@ static int lan743x_phy_open(struct lan743x_adapter *adapter)
                                        lan743x_phy_link_status_change, 0,
                                        adapter->phy_mode);
                of_node_put(phynode);
-               if (!phydev)
-                       goto return_error;
-       } else {
+       }
+
+       if (!phydev) {
+               /* try internal phy */
                phydev = phy_find_first(adapter->mdiobus);
                if (!phydev)
                        goto return_error;
 
+               adapter->phy_mode = PHY_INTERFACE_MODE_GMII;
                ret = phy_connect_direct(netdev, phydev,
                                         lan743x_phy_link_status_change,
                                         adapter->phy_mode);
@@ -2733,7 +2730,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
 
        adapter->intr.irq = adapter->pdev->irq;
        lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF);
-       mutex_init(&adapter->dp_lock);
 
        ret = lan743x_gpio_init(adapter);
        if (ret)
index c61a404..a536f4a 100644 (file)
@@ -712,9 +712,6 @@ struct lan743x_adapter {
        struct lan743x_csr      csr;
        struct lan743x_intr     intr;
 
-       /* lock, used to prevent concurrent access to data port */
-       struct mutex            dp_lock;
-
        struct lan743x_gpio     gpio;
        struct lan743x_ptp      ptp;
 
index 7766d73..85d9c3e 100644 (file)
@@ -4163,7 +4163,8 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
                opts[1] |= transport_offset << TCPHO_SHIFT;
        } else {
                if (unlikely(skb->len < ETH_ZLEN && rtl_test_hw_pad_bug(tp)))
-                       return !eth_skb_pad(skb);
+                       /* eth_skb_pad would free the skb on error */
+                       return !__skb_put_padto(skb, ETH_ZLEN, false);
        }
 
        return true;
@@ -4342,18 +4343,9 @@ static netdev_features_t rtl8169_features_check(struct sk_buff *skb,
                    rtl_chip_supports_csum_v2(tp))
                        features &= ~NETIF_F_ALL_TSO;
        } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
-               if (skb->len < ETH_ZLEN) {
-                       switch (tp->mac_version) {
-                       case RTL_GIGA_MAC_VER_11:
-                       case RTL_GIGA_MAC_VER_12:
-                       case RTL_GIGA_MAC_VER_17:
-                       case RTL_GIGA_MAC_VER_34:
-                               features &= ~NETIF_F_CSUM_MASK;
-                               break;
-                       default:
-                               break;
-                       }
-               }
+               /* work around hw bug on some chip versions */
+               if (skb->len < ETH_ZLEN)
+                       features &= ~NETIF_F_CSUM_MASK;
 
                if (transport_offset > TCPHO_MAX &&
                    rtl_chip_supports_csum_v2(tp))
index fb1db71..575580d 100644 (file)
@@ -551,6 +551,8 @@ static struct phy_driver realtek_drvs[] = {
        {
                PHY_ID_MATCH_EXACT(0x00008201),
                .name           = "RTL8201CP Ethernet",
+               .read_page      = rtl821x_read_page,
+               .write_page     = rtl821x_write_page,
        }, {
                PHY_ID_MATCH_EXACT(0x001cc816),
                .name           = "RTL8201F Fast Ethernet",
index 60c1aad..f2793ff 100644 (file)
@@ -608,8 +608,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
        return ret;
 }
 
-static int vrf_finish_direct(struct net *net, struct sock *sk,
-                            struct sk_buff *skb)
+static void vrf_finish_direct(struct sk_buff *skb)
 {
        struct net_device *vrf_dev = skb->dev;
 
@@ -628,7 +627,8 @@ static int vrf_finish_direct(struct net *net, struct sock *sk,
                skb_pull(skb, ETH_HLEN);
        }
 
-       return 1;
+       /* reset skb device */
+       nf_reset_ct(skb);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -707,15 +707,41 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev,
        return skb;
 }
 
+static int vrf_output6_direct_finish(struct net *net, struct sock *sk,
+                                    struct sk_buff *skb)
+{
+       vrf_finish_direct(skb);
+
+       return vrf_ip6_local_out(net, sk, skb);
+}
+
 static int vrf_output6_direct(struct net *net, struct sock *sk,
                              struct sk_buff *skb)
 {
+       int err = 1;
+
        skb->protocol = htons(ETH_P_IPV6);
 
-       return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
-                           net, sk, skb, NULL, skb->dev,
-                           vrf_finish_direct,
-                           !(IPCB(skb)->flags & IPSKB_REROUTED));
+       if (!(IPCB(skb)->flags & IPSKB_REROUTED))
+               err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
+                             NULL, skb->dev, vrf_output6_direct_finish);
+
+       if (likely(err == 1))
+               vrf_finish_direct(skb);
+
+       return err;
+}
+
+static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk,
+                                    struct sk_buff *skb)
+{
+       int err;
+
+       err = vrf_output6_direct(net, sk, skb);
+       if (likely(err == 1))
+               err = vrf_ip6_local_out(net, sk, skb);
+
+       return err;
 }
 
 static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
@@ -728,18 +754,15 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
        skb->dev = vrf_dev;
 
        err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
-                     skb, NULL, vrf_dev, vrf_output6_direct);
+                     skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
 
        if (likely(err == 1))
                err = vrf_output6_direct(net, sk, skb);
 
-       /* reset skb device */
        if (likely(err == 1))
-               nf_reset_ct(skb);
-       else
-               skb = NULL;
+               return skb;
 
-       return skb;
+       return NULL;
 }
 
 static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
@@ -919,15 +942,41 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
        return skb;
 }
 
+static int vrf_output_direct_finish(struct net *net, struct sock *sk,
+                                   struct sk_buff *skb)
+{
+       vrf_finish_direct(skb);
+
+       return vrf_ip_local_out(net, sk, skb);
+}
+
 static int vrf_output_direct(struct net *net, struct sock *sk,
                             struct sk_buff *skb)
 {
+       int err = 1;
+
        skb->protocol = htons(ETH_P_IP);
 
-       return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
-                           net, sk, skb, NULL, skb->dev,
-                           vrf_finish_direct,
-                           !(IPCB(skb)->flags & IPSKB_REROUTED));
+       if (!(IPCB(skb)->flags & IPSKB_REROUTED))
+               err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
+                             NULL, skb->dev, vrf_output_direct_finish);
+
+       if (likely(err == 1))
+               vrf_finish_direct(skb);
+
+       return err;
+}
+
+static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk,
+                                   struct sk_buff *skb)
+{
+       int err;
+
+       err = vrf_output_direct(net, sk, skb);
+       if (likely(err == 1))
+               err = vrf_ip_local_out(net, sk, skb);
+
+       return err;
 }
 
 static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
@@ -940,18 +989,15 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
        skb->dev = vrf_dev;
 
        err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
-                     skb, NULL, vrf_dev, vrf_output_direct);
+                     skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
 
        if (likely(err == 1))
                err = vrf_output_direct(net, sk, skb);
 
-       /* reset skb device */
        if (likely(err == 1))
-               nf_reset_ct(skb);
-       else
-               skb = NULL;
+               return skb;
 
-       return skb;
+       return NULL;
 }
 
 static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
index f8aed06..2369ca2 100644 (file)
@@ -889,6 +889,7 @@ static ssize_t cosa_write(struct file *file,
                        chan->tx_status = 1;
                        spin_unlock_irqrestore(&cosa->lock, flags);
                        up(&chan->wsem);
+                       kfree(kbuf);
                        return -ERESTARTSYS;
                }
        }
index d1e3c68..5deb370 100644 (file)
 #else
 #define __diag_GCC_8(s)
 #endif
-
-#define __no_fgcse __attribute__((optimize("-fno-gcse")))
index 6e390d5..ac3fa37 100644 (file)
@@ -247,10 +247,6 @@ struct ftrace_likely_data {
 #define asm_inline asm
 #endif
 
-#ifndef __no_fgcse
-# define __no_fgcse
-#endif
-
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 
index 72d62cb..1b62397 100644 (file)
@@ -558,21 +558,21 @@ struct sk_filter {
 DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
 
 #define __BPF_PROG_RUN(prog, ctx, dfunc)       ({                      \
-       u32 ret;                                                        \
+       u32 __ret;                                                      \
        cant_migrate();                                                 \
        if (static_branch_unlikely(&bpf_stats_enabled_key)) {           \
-               struct bpf_prog_stats *stats;                           \
-               u64 start = sched_clock();                              \
-               ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func);     \
-               stats = this_cpu_ptr(prog->aux->stats);                 \
-               u64_stats_update_begin(&stats->syncp);                  \
-               stats->cnt++;                                           \
-               stats->nsecs += sched_clock() - start;                  \
-               u64_stats_update_end(&stats->syncp);                    \
+               struct bpf_prog_stats *__stats;                         \
+               u64 __start = sched_clock();                            \
+               __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func);   \
+               __stats = this_cpu_ptr(prog->aux->stats);               \
+               u64_stats_update_begin(&__stats->syncp);                \
+               __stats->cnt++;                                         \
+               __stats->nsecs += sched_clock() - __start;              \
+               u64_stats_update_end(&__stats->syncp);                  \
        } else {                                                        \
-               ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func);     \
+               __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func);   \
        }                                                               \
-       ret; })
+       __ret; })
 
 #define BPF_PROG_RUN(prog, ctx)                                                \
        __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func)
index 0140d08..01755b8 100644 (file)
@@ -86,7 +86,7 @@ int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
 void xp_destroy(struct xsk_buff_pool *pool);
 void xp_release(struct xdp_buff_xsk *xskb);
 void xp_get_pool(struct xsk_buff_pool *pool);
-void xp_put_pool(struct xsk_buff_pool *pool);
+bool xp_put_pool(struct xsk_buff_pool *pool);
 void xp_clear_dev(struct xsk_buff_pool *pool);
 void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
 void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs);
index bdc8cd1..c1b9f71 100644 (file)
@@ -1,6 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y := core.o
-CFLAGS_core.o += $(call cc-disable-warning, override-init)
+ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y)
+# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
+cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
+endif
+CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
index 78ea8a7..56cc5a9 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/bpf_verifier.h>
 #include <net/bpf_sk_storage.h>
 #include <linux/bpf_local_storage.h>
+#include <linux/btf_ids.h>
 
 /* For every LSM hook that allows attachment of BPF programs, declare a nop
  * function where a BPF program can be attached.
@@ -26,7 +27,11 @@ noinline RET bpf_lsm_##NAME(__VA_ARGS__)     \
 #include <linux/lsm_hook_defs.h>
 #undef LSM_HOOK
 
-#define BPF_LSM_SYM_PREFX  "bpf_lsm_"
+#define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME)
+BTF_SET_START(bpf_lsm_hooks)
+#include <linux/lsm_hook_defs.h>
+#undef LSM_HOOK
+BTF_SET_END(bpf_lsm_hooks)
 
 int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
                        const struct bpf_prog *prog)
@@ -37,8 +42,7 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
                return -EINVAL;
        }
 
-       if (strncmp(BPF_LSM_SYM_PREFX, prog->aux->attach_func_name,
-                   sizeof(BPF_LSM_SYM_PREFX) - 1)) {
+       if (!btf_id_set_contains(&bpf_lsm_hooks, prog->aux->attach_btf_id)) {
                bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n",
                        prog->aux->attach_btf_id, prog->aux->attach_func_name);
                return -EINVAL;
index 9268d77..55454d2 100644 (file)
@@ -1369,7 +1369,7 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
  *
  * Decode and execute eBPF instructions.
  */
-static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
+static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
 {
 #define BPF_INSN_2_LBL(x, y)    [BPF_##x | BPF_##y] = &&x##_##y
 #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
index 1815e97..1fccba6 100644 (file)
@@ -821,6 +821,32 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
        }
 }
 
+static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
+                           void *value, bool onallcpus)
+{
+       /* When using prealloc and not setting the initial value on all cpus,
+        * zero-fill element values for other cpus (just as what happens when
+        * not using prealloc). Otherwise, bpf program has no way to ensure
+        * known initial values for cpus other than current one
+        * (onallcpus=false always when coming from bpf prog).
+        */
+       if (htab_is_prealloc(htab) && !onallcpus) {
+               u32 size = round_up(htab->map.value_size, 8);
+               int current_cpu = raw_smp_processor_id();
+               int cpu;
+
+               for_each_possible_cpu(cpu) {
+                       if (cpu == current_cpu)
+                               bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
+                                               size);
+                       else
+                               memset(per_cpu_ptr(pptr, cpu), 0, size);
+               }
+       } else {
+               pcpu_copy_value(htab, pptr, value, onallcpus);
+       }
+}
+
 static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
 {
        return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
@@ -891,7 +917,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
                        }
                }
 
-               pcpu_copy_value(htab, pptr, value, onallcpus);
+               pcpu_init_value(htab, pptr, value, onallcpus);
 
                if (!prealloc)
                        htab_elem_set_ptr(l_new, key_size, pptr);
@@ -1183,7 +1209,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
                pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
                                value, onallcpus);
        } else {
-               pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
+               pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
                                value, onallcpus);
                hlist_nulls_add_head_rcu(&l_new->hash_node, head);
                l_new = NULL;
index ace4911..26bced2 100644 (file)
@@ -6,6 +6,7 @@ config USERMODE_DRIVER
 menuconfig BPF_PRELOAD
        bool "Preload BPF file system with kernel specific program and map iterators"
        depends on BPF
+       depends on BPF_SYSCALL
        # The dependency on !COMPILE_TEST prevents it from being enabled
        # in allmodconfig or allyesconfig configurations
        depends on !COMPILE_TEST
index a932d95..ab4b136 100644 (file)
@@ -8254,8 +8254,6 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port,
 {
        struct devlink_port_attrs *attrs = &devlink_port->attrs;
 
-       if (WARN_ON(devlink_port->registered))
-               return -EEXIST;
        devlink_port->attrs_set = true;
        attrs->flavour = flavour;
        if (attrs->switch_id.id_len) {
@@ -8279,6 +8277,8 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port,
 {
        int ret;
 
+       if (WARN_ON(devlink_port->registered))
+               return;
        devlink_port->attrs = *attrs;
        ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
        if (ret)
@@ -8301,6 +8301,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro
        struct devlink_port_attrs *attrs = &devlink_port->attrs;
        int ret;
 
+       if (WARN_ON(devlink_port->registered))
+               return;
        ret = __devlink_port_attrs_set(devlink_port,
                                       DEVLINK_PORT_FLAVOUR_PCI_PF);
        if (ret)
@@ -8326,6 +8328,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
        struct devlink_port_attrs *attrs = &devlink_port->attrs;
        int ret;
 
+       if (WARN_ON(devlink_port->registered))
+               return;
        ret = __devlink_port_attrs_set(devlink_port,
                                       DEVLINK_PORT_FLAVOUR_PCI_VF);
        if (ret)
index 8ee4cdb..1c9f4df 100644 (file)
@@ -280,7 +280,7 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info)
                                          active_diff_mask, compact);
        }
        if (mod)
-               ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL);
+               netdev_features_change(dev);
 
 out_rtnl:
        rtnl_unlock();
index 25f1caf..e25be2d 100644 (file)
@@ -263,7 +263,7 @@ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu)
        const struct icmphdr *icmph = icmp_hdr(skb);
        const struct iphdr *iph = ip_hdr(skb);
 
-       if (mtu <= 576 || iph->frag_off != htons(IP_DF))
+       if (mtu < 576 || iph->frag_off != htons(IP_DF))
                return 0;
 
        if (ipv4_is_lbcast(iph->daddr)  || ipv4_is_multicast(iph->daddr) ||
@@ -359,7 +359,7 @@ static int iptunnel_pmtud_check_icmpv6(struct sk_buff *skb, int mtu)
        __be16 frag_off;
        int offset;
 
-       if (mtu <= IPV6_MIN_MTU)
+       if (mtu < IPV6_MIN_MTU)
                return 0;
 
        if (stype == IPV6_ADDR_ANY || stype == IPV6_ADDR_MULTICAST ||
index 6ac473b..00dc3f9 100644 (file)
@@ -331,7 +331,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
        __u32 cookie = ntohl(th->ack_seq) - 1;
        struct sock *ret = sk;
        struct request_sock *req;
-       int mss;
+       int full_space, mss;
        struct rtable *rt;
        __u8 rcv_wscale;
        struct flowi4 fl4;
@@ -427,8 +427,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 
        /* Try to redo what tcp_v4_send_synack did. */
        req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+       /* limit the window selection if the user enforce a smaller rx buffer */
+       full_space = tcp_full_space(sk);
+       if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+           (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+               req->rsk_window_clamp = full_space;
 
-       tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
+       tcp_select_initial_window(sk, full_space, req->mss,
                                  &req->rsk_rcv_wnd, &req->rsk_window_clamp,
                                  ireq->wscale_ok, &rcv_wscale,
                                  dst_metric(&rt->dst, RTAX_INITRWND));
index e67a66f..c62805c 100644 (file)
@@ -366,7 +366,7 @@ out:
 static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
                                               struct sk_buff *skb)
 {
-       struct udphdr *uh = udp_hdr(skb);
+       struct udphdr *uh = udp_gro_udphdr(skb);
        struct sk_buff *pp = NULL;
        struct udphdr *uh2;
        struct sk_buff *p;
@@ -500,12 +500,22 @@ out:
 }
 EXPORT_SYMBOL(udp_gro_receive);
 
+static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
+                                       __be16 dport)
+{
+       const struct iphdr *iph = skb_gro_network_header(skb);
+
+       return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
+                                iph->daddr, dport, inet_iif(skb),
+                                inet_sdif(skb), &udp_table, NULL);
+}
+
 INDIRECT_CALLABLE_SCOPE
 struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
        struct udphdr *uh = udp_gro_udphdr(skb);
+       struct sock *sk = NULL;
        struct sk_buff *pp;
-       struct sock *sk;
 
        if (unlikely(!uh))
                goto flush;
@@ -523,7 +533,10 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 skip:
        NAPI_GRO_CB(skb)->is_ipv6 = 0;
        rcu_read_lock();
-       sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+
+       if (static_branch_unlikely(&udp_encap_needed_key))
+               sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest);
+
        pp = udp_gro_receive(head, skb, uh, sk);
        rcu_read_unlock();
        return pp;
index 5e2c34c..5e7983c 100644 (file)
@@ -1128,7 +1128,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
        if (tdev && !netif_is_l3_master(tdev)) {
                int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 
-               dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
                dev->mtu = tdev->mtu - t_hlen;
                if (dev->mtu < IPV6_MIN_MTU)
                        dev->mtu = IPV6_MIN_MTU;
@@ -1426,7 +1425,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
        dev->priv_destructor    = ipip6_dev_free;
 
        dev->type               = ARPHRD_SIT;
-       dev->hard_header_len    = LL_MAX_HEADER + t_hlen;
        dev->mtu                = ETH_DATA_LEN - t_hlen;
        dev->min_mtu            = IPV6_MIN_MTU;
        dev->max_mtu            = IP6_MAX_MTU - t_hlen;
index e796a64..9b6cae1 100644 (file)
@@ -136,7 +136,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
        __u32 cookie = ntohl(th->ack_seq) - 1;
        struct sock *ret = sk;
        struct request_sock *req;
-       int mss;
+       int full_space, mss;
        struct dst_entry *dst;
        __u8 rcv_wscale;
        u32 tsoff = 0;
@@ -241,7 +241,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
        }
 
        req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
-       tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
+       /* limit the window selection if the user enforce a smaller rx buffer */
+       full_space = tcp_full_space(sk);
+       if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+           (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+               req->rsk_window_clamp = full_space;
+
+       tcp_select_initial_window(sk, full_space, req->mss,
                                  &req->rsk_rcv_wnd, &req->rsk_window_clamp,
                                  ireq->wscale_ok, &rcv_wscale,
                                  dst_metric(dst, RTAX_INITRWND));
index 584157a..f9e888d 100644 (file)
@@ -111,12 +111,22 @@ out:
        return segs;
 }
 
+static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
+                                       __be16 dport)
+{
+       const struct ipv6hdr *iph = skb_gro_network_header(skb);
+
+       return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
+                                &iph->daddr, dport, inet6_iif(skb),
+                                inet6_sdif(skb), &udp_table, NULL);
+}
+
 INDIRECT_CALLABLE_SCOPE
 struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
        struct udphdr *uh = udp_gro_udphdr(skb);
+       struct sock *sk = NULL;
        struct sk_buff *pp;
-       struct sock *sk;
 
        if (unlikely(!uh))
                goto flush;
@@ -135,7 +145,10 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
 skip:
        NAPI_GRO_CB(skb)->is_ipv6 = 1;
        rcu_read_lock();
-       sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL;
+
+       if (static_branch_unlikely(&udpv6_encap_needed_key))
+               sk = udp6_gro_lookup_skb(skb, uh->source, uh->dest);
+
        pp = udp_gro_receive(head, skb, uh, sk);
        rcu_read_unlock();
        return pp;
index d805720..047238f 100644 (file)
@@ -1434,7 +1434,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
                break;
        }
 
-       if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) {
+       if ((how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) &&
+           sk->sk_state == IUCV_CONNECTED) {
                if (iucv->transport == AF_IUCV_TRANS_IUCV) {
                        txmsg.class = 0;
                        txmsg.tag = 0;
index e7419fd..88f2a7a 100644 (file)
@@ -2467,6 +2467,7 @@ static struct proto mptcp_prot = {
        .memory_pressure        = &tcp_memory_pressure,
        .stream_memory_free     = mptcp_memory_free,
        .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
+       .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
        .sysctl_mem     = sysctl_tcp_mem,
        .obj_size       = sizeof(struct mptcp_sock),
        .slab_flags     = SLAB_TYPESAFE_BY_RCU,
index 2e8e3f7..fc55c91 100644 (file)
@@ -1166,12 +1166,13 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
        struct netlbl_unlhsh_walk_arg cb_arg;
        u32 skip_bkt = cb->args[0];
        u32 skip_chain = cb->args[1];
-       u32 iter_bkt;
-       u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
+       u32 skip_addr4 = cb->args[2];
+       u32 iter_bkt, iter_chain, iter_addr4 = 0, iter_addr6 = 0;
        struct netlbl_unlhsh_iface *iface;
        struct list_head *iter_list;
        struct netlbl_af4list *addr4;
 #if IS_ENABLED(CONFIG_IPV6)
+       u32 skip_addr6 = cb->args[3];
        struct netlbl_af6list *addr6;
 #endif
 
@@ -1182,7 +1183,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
        rcu_read_lock();
        for (iter_bkt = skip_bkt;
             iter_bkt < rcu_dereference(netlbl_unlhsh)->size;
-            iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) {
+            iter_bkt++) {
                iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt];
                list_for_each_entry_rcu(iface, iter_list, list) {
                        if (!iface->valid ||
@@ -1190,7 +1191,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
                                continue;
                        netlbl_af4list_foreach_rcu(addr4,
                                                   &iface->addr4_list) {
-                               if (iter_addr4++ < cb->args[2])
+                               if (iter_addr4++ < skip_addr4)
                                        continue;
                                if (netlbl_unlabel_staticlist_gen(
                                              NLBL_UNLABEL_C_STATICLIST,
@@ -1203,10 +1204,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
                                        goto unlabel_staticlist_return;
                                }
                        }
+                       iter_addr4 = 0;
+                       skip_addr4 = 0;
 #if IS_ENABLED(CONFIG_IPV6)
                        netlbl_af6list_foreach_rcu(addr6,
                                                   &iface->addr6_list) {
-                               if (iter_addr6++ < cb->args[3])
+                               if (iter_addr6++ < skip_addr6)
                                        continue;
                                if (netlbl_unlabel_staticlist_gen(
                                              NLBL_UNLABEL_C_STATICLIST,
@@ -1219,8 +1222,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
                                        goto unlabel_staticlist_return;
                                }
                        }
+                       iter_addr6 = 0;
+                       skip_addr6 = 0;
 #endif /* IPv6 */
                }
+               iter_chain = 0;
+               skip_chain = 0;
        }
 
 unlabel_staticlist_return:
index 5f6f860..13f3143 100644 (file)
@@ -664,12 +664,18 @@ static int tipc_topsrv_start(struct net *net)
 
        ret = tipc_topsrv_work_start(srv);
        if (ret < 0)
-               return ret;
+               goto err_start;
 
        ret = tipc_topsrv_create_listener(srv);
        if (ret < 0)
-               tipc_topsrv_work_stop(srv);
+               goto err_create;
 
+       return 0;
+
+err_create:
+       tipc_topsrv_work_stop(srv);
+err_start:
+       kfree(srv);
        return ret;
 }
 
index 0bbb283..046d3fe 100644 (file)
@@ -825,7 +825,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
        sock->state = SS_CONNECTED;
        rc = 0;
 out_put_neigh:
-       if (rc) {
+       if (rc && x25->neighbour) {
                read_lock_bh(&x25_list_lock);
                x25_neigh_put(x25->neighbour);
                x25->neighbour = NULL;
index b71a32e..cfbec39 100644 (file)
@@ -1146,7 +1146,8 @@ static void xsk_destruct(struct sock *sk)
        if (!sock_flag(sk, SOCK_DEAD))
                return;
 
-       xp_put_pool(xs->pool);
+       if (!xp_put_pool(xs->pool))
+               xdp_put_umem(xs->umem);
 
        sk_refcnt_debug_dec(sk);
 }
index 64c9e55..8a3bf4e 100644 (file)
@@ -251,15 +251,18 @@ void xp_get_pool(struct xsk_buff_pool *pool)
        refcount_inc(&pool->users);
 }
 
-void xp_put_pool(struct xsk_buff_pool *pool)
+bool xp_put_pool(struct xsk_buff_pool *pool)
 {
        if (!pool)
-               return;
+               return false;
 
        if (refcount_dec_and_test(&pool->users)) {
                INIT_WORK(&pool->work, xp_release_deferred);
                schedule_work(&pool->work);
+               return true;
        }
+
+       return false;
 }
 
 static struct xsk_dma_map *xp_find_dma_map(struct xsk_buff_pool *pool)
index 4a74531..b68bd2f 100644 (file)
@@ -290,7 +290,7 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
 
 int main(int argc, char **argv)
 {
-       struct rlimit r = {1024*1024, RLIM_INFINITY};
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
        extern char __executable_start;
        char filename[256], buf[256];
        __u64 uprobe_file_offset;
index 3e36b3e..3d6eab7 100644 (file)
@@ -116,7 +116,7 @@ static void int_exit(int sig)
 
 int main(int ac, char **argv)
 {
-       struct rlimit r = {1024*1024, RLIM_INFINITY};
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
        long key, next_key, value;
        struct bpf_link *links[2];
        struct bpf_program *prog;
index 70e9877..83e0fec 100644 (file)
@@ -107,7 +107,7 @@ static void print_hist(int fd)
 
 int main(int ac, char **argv)
 {
-       struct rlimit r = {1024*1024, RLIM_INFINITY};
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
        struct bpf_link *links[2];
        struct bpf_program *prog;
        struct bpf_object *obj;
index 6fb8dbd..f78cb18 100644 (file)
@@ -765,7 +765,7 @@ static int load_cpumap_prog(char *file_name, char *prog_name,
 
 int main(int argc, char **argv)
 {
-       struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
        char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
        char *mprog_filename = "xdp_redirect_kern.o";
        char *redir_interface = NULL, *redir_map = NULL;
index caa4e7f..93fa1bc 100644 (file)
@@ -450,7 +450,7 @@ static void stats_poll(int interval, int action, __u32 cfg_opt)
 int main(int argc, char **argv)
 {
        __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
-       struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
        struct bpf_prog_load_attr prog_load_attr = {
                .prog_type      = BPF_PROG_TYPE_XDP,
        };
index 6769caa..3148437 100755 (executable)
@@ -408,6 +408,7 @@ class PrinterHelpers(Printer):
             'struct bpf_perf_event_data',
             'struct bpf_perf_event_value',
             'struct bpf_pidns_info',
+            'struct bpf_redir_neigh',
             'struct bpf_sock',
             'struct bpf_sock_addr',
             'struct bpf_sock_ops',
index a43a6f1..359960a 100644 (file)
@@ -843,9 +843,14 @@ static int handle_perms(void)
                else
                        p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'",
                              capability_msg(bpf_caps, 0),
+#ifdef CAP_BPF
                              capability_msg(bpf_caps, 1),
                              capability_msg(bpf_caps, 2),
-                             capability_msg(bpf_caps, 3));
+                             capability_msg(bpf_caps, 3)
+#else
+                               "", "", "", "", "", ""
+#endif /* CAP_BPF */
+                               );
                goto exit_free;
        }
 
index d942c1e..acdb2c2 100644 (file)
@@ -940,7 +940,7 @@ static int parse_attach_detach_args(int argc, char **argv, int *progfd,
        }
 
        if (*attach_type == BPF_FLOW_DISSECTOR) {
-               *mapfd = -1;
+               *mapfd = 0;
                return 0;
        }
 
index 4e3512f..ce5b65e 100644 (file)
@@ -70,7 +70,7 @@ int BPF_PROG(fentry_XXX)
 static inline void
 fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
 {
-       struct bpf_perf_event_value *before, diff, *accum;
+       struct bpf_perf_event_value *before, diff;
 
        before = bpf_map_lookup_elem(&fentry_readings, &id);
        /* only account samples with a valid fentry_reading */
@@ -95,7 +95,7 @@ int BPF_PROG(fexit_XXX)
 {
        struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
        u32 cpu = bpf_get_smp_processor_id();
-       u32 i, one = 1, zero = 0;
+       u32 i, zero = 0;
        int err;
        u64 *count;
 
index d9b385f..10a4c4c 100644 (file)
@@ -15,6 +15,9 @@
 static inline size_t hash_bits(size_t h, int bits)
 {
        /* shuffle bits and return requested number of upper bits */
+       if (bits == 0)
+               return 0;
+
 #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
        /* LP64 case */
        return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
@@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value);
  * @key: key to iterate entries for
  */
 #define hashmap__for_each_key_entry(map, cur, _key)                        \
-       for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
-                                            map->cap_bits);                \
-                    map->buckets ? map->buckets[bkt] : NULL; });           \
+       for (cur = map->buckets                                             \
+                    ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+                    : NULL;                                                \
             cur;                                                           \
             cur = cur->next)                                               \
                if (map->equal_fn(cur->key, (_key), map->ctx))
 
 #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key)              \
-       for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
-                                            map->cap_bits);                \
-                    cur = map->buckets ? map->buckets[bkt] : NULL; });     \
+       for (cur = map->buckets                                             \
+                    ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+                    : NULL;                                                \
             cur && ({ tmp = cur->next; true; });                           \
             cur = tmp)                                                     \
                if (map->equal_fn(cur->key, (_key), map->ctx))
index e3c98c0..9bc537d 100644 (file)
@@ -891,13 +891,16 @@ int xsk_umem__delete(struct xsk_umem *umem)
 void xsk_socket__delete(struct xsk_socket *xsk)
 {
        size_t desc_sz = sizeof(struct xdp_desc);
-       struct xsk_ctx *ctx = xsk->ctx;
        struct xdp_mmap_offsets off;
+       struct xsk_umem *umem;
+       struct xsk_ctx *ctx;
        int err;
 
        if (!xsk)
                return;
 
+       ctx = xsk->ctx;
+       umem = ctx->umem;
        if (ctx->prog_fd != -1) {
                xsk_delete_bpf_maps(xsk);
                close(ctx->prog_fd);
@@ -917,11 +920,11 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 
        xsk_put_ctx(ctx);
 
-       ctx->umem->refcount--;
+       umem->refcount--;
        /* Do not close an fd that also has an associated umem connected
         * to it.
         */
-       if (xsk->fd != ctx->umem->fd)
+       if (xsk->fd != umem->fd)
                close(xsk->fd);
        free(xsk);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c
new file mode 100644 (file)
index 0000000..14a3110
--- /dev/null
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */
+
+#include <test_progs.h>
+#include "test_map_init.skel.h"
+
+#define TEST_VALUE 0x1234
+#define FILL_VALUE 0xdeadbeef
+
+static int nr_cpus;
+static int duration;
+
+typedef unsigned long long map_key_t;
+typedef unsigned long long map_value_t;
+typedef struct {
+       map_value_t v; /* padding */
+} __bpf_percpu_val_align pcpu_map_value_t;
+
+
+static int map_populate(int map_fd, int num)
+{
+       pcpu_map_value_t value[nr_cpus];
+       int i, err;
+       map_key_t key;
+
+       for (i = 0; i < nr_cpus; i++)
+               bpf_percpu(value, i) = FILL_VALUE;
+
+       for (key = 1; key <= num; key++) {
+               err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+               if (!ASSERT_OK(err, "bpf_map_update_elem"))
+                       return -1;
+       }
+
+       return 0;
+}
+
+static struct test_map_init *setup(enum bpf_map_type map_type, int map_sz,
+                           int *map_fd, int populate)
+{
+       struct test_map_init *skel;
+       int err;
+
+       skel = test_map_init__open();
+       if (!ASSERT_OK_PTR(skel, "skel_open"))
+               return NULL;
+
+       err = bpf_map__set_type(skel->maps.hashmap1, map_type);
+       if (!ASSERT_OK(err, "bpf_map__set_type"))
+               goto error;
+
+       err = bpf_map__set_max_entries(skel->maps.hashmap1, map_sz);
+       if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+               goto error;
+
+       err = test_map_init__load(skel);
+       if (!ASSERT_OK(err, "skel_load"))
+               goto error;
+
+       *map_fd = bpf_map__fd(skel->maps.hashmap1);
+       if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n"))
+               goto error;
+
+       err = map_populate(*map_fd, populate);
+       if (!ASSERT_OK(err, "map_populate"))
+               goto error_map;
+
+       return skel;
+
+error_map:
+       close(*map_fd);
+error:
+       test_map_init__destroy(skel);
+       return NULL;
+}
+
+/* executes bpf program that updates map with key, value */
+static int prog_run_insert_elem(struct test_map_init *skel, map_key_t key,
+                               map_value_t value)
+{
+       struct test_map_init__bss *bss;
+
+       bss = skel->bss;
+
+       bss->inKey = key;
+       bss->inValue = value;
+       bss->inPid = getpid();
+
+       if (!ASSERT_OK(test_map_init__attach(skel), "skel_attach"))
+               return -1;
+
+       /* Let tracepoint trigger */
+       syscall(__NR_getpgid);
+
+       test_map_init__detach(skel);
+
+       return 0;
+}
+
+static int check_values_one_cpu(pcpu_map_value_t *value, map_value_t expected)
+{
+       int i, nzCnt = 0;
+       map_value_t val;
+
+       for (i = 0; i < nr_cpus; i++) {
+               val = bpf_percpu(value, i);
+               if (val) {
+                       if (CHECK(val != expected, "map value",
+                                 "unexpected for cpu %d: 0x%llx\n", i, val))
+                               return -1;
+                       nzCnt++;
+               }
+       }
+
+       if (CHECK(nzCnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+                 nzCnt))
+               return -1;
+
+       return 0;
+}
+
+/* Add key=1 elem with values set for all CPUs
+ * Delete elem key=1
+ * Run bpf prog that inserts new key=1 elem with value=0x1234
+ *   (bpf prog can only set value for current CPU)
+ * Lookup Key=1 and check value is as expected for all CPUs:
+ *   value set by bpf prog for one CPU, 0 for all others
+ */
+static void test_pcpu_map_init(void)
+{
+       pcpu_map_value_t value[nr_cpus];
+       struct test_map_init *skel;
+       int map_fd, err;
+       map_key_t key;
+
+       /* max 1 elem in map so insertion is forced to reuse freed entry */
+       skel = setup(BPF_MAP_TYPE_PERCPU_HASH, 1, &map_fd, 1);
+       if (!ASSERT_OK_PTR(skel, "prog_setup"))
+               return;
+
+       /* delete element so the entry can be re-used*/
+       key = 1;
+       err = bpf_map_delete_elem(map_fd, &key);
+       if (!ASSERT_OK(err, "bpf_map_delete_elem"))
+               goto cleanup;
+
+       /* run bpf prog that inserts new elem, re-using the slot just freed */
+       err = prog_run_insert_elem(skel, key, TEST_VALUE);
+       if (!ASSERT_OK(err, "prog_run_insert_elem"))
+               goto cleanup;
+
+       /* check that key=1 was re-created by bpf prog */
+       err = bpf_map_lookup_elem(map_fd, &key, value);
+       if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+       /* and has expected values */
+       check_values_one_cpu(value, TEST_VALUE);
+
+cleanup:
+       test_map_init__destroy(skel);
+}
+
+/* Add key=1 and key=2 elems with values set for all CPUs
+ * Run bpf prog that inserts new key=3 elem
+ *   (only for current cpu; other cpus should have initial value = 0)
+ * Lookup Key=1 and check value is as expected for all CPUs
+ */
+static void test_pcpu_lru_map_init(void)
+{
+       pcpu_map_value_t value[nr_cpus];
+       struct test_map_init *skel;
+       int map_fd, err;
+       map_key_t key;
+
+       /* Set up LRU map with 2 elements, values filled for all CPUs.
+        * With these 2 elements, the LRU map is full
+        */
+       skel = setup(BPF_MAP_TYPE_LRU_PERCPU_HASH, 2, &map_fd, 2);
+       if (!ASSERT_OK_PTR(skel, "prog_setup"))
+               return;
+
+       /* run bpf prog that inserts new key=3 element, re-using LRU slot */
+       key = 3;
+       err = prog_run_insert_elem(skel, key, TEST_VALUE);
+       if (!ASSERT_OK(err, "prog_run_insert_elem"))
+               goto cleanup;
+
+       /* check that key=3 replaced one of earlier elements */
+       err = bpf_map_lookup_elem(map_fd, &key, value);
+       if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+       /* and has expected values */
+       check_values_one_cpu(value, TEST_VALUE);
+
+cleanup:
+       test_map_init__destroy(skel);
+}
+
+void test_map_init(void)
+{
+       nr_cpus = bpf_num_possible_cpus();
+       if (nr_cpus <= 1) {
+               printf("%s:SKIP: >1 cpu needed for this test\n", __func__);
+               test__skip();
+               return;
+       }
+
+       if (test__start_subtest("pcpu_map_init"))
+               test_pcpu_map_init();
+       if (test__start_subtest("pcpu_lru_map_init"))
+               test_pcpu_lru_map_init();
+}
index 0057831..30982a7 100644 (file)
@@ -243,7 +243,10 @@ static ino_t get_inode_from_kernfs(struct kernfs_node* node)
        }
 }
 
-int pids_cgrp_id = 1;
+extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
+enum cgroup_subsys_id___local {
+       pids_cgrp_id___local = 123, /* value doesn't matter */
+};
 
 static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
                                         struct task_struct* task,
@@ -253,7 +256,9 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
                BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
        struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
 
-       if (ENABLE_CGROUP_V1_RESOLVER) {
+       if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
+               int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
+                                                 pids_cgrp_id___local);
 #ifdef UNROLL
 #pragma unroll
 #endif
@@ -262,7 +267,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
                                BPF_CORE_READ(task, cgroups, subsys[i]);
                        if (subsys != NULL) {
                                int subsys_id = BPF_CORE_READ(subsys, ss, id);
-                               if (subsys_id == pids_cgrp_id) {
+                               if (subsys_id == cgrp_id) {
                                        proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
                                        root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
                                        break;
diff --git a/tools/testing/selftests/bpf/progs/test_map_init.c b/tools/testing/selftests/bpf/progs/test_map_init.c
new file mode 100644 (file)
index 0000000..c89d28e
--- /dev/null
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u64 inKey = 0;
+__u64 inValue = 0;
+__u32 inPid = 0;
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+       __uint(max_entries, 2);
+       __type(key, __u64);
+       __type(value, __u64);
+} hashmap1 SEC(".maps");
+
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int sysenter_getpgid(const void *ctx)
+{
+       /* Just do it for once, when called from our own test prog. This
+        * ensures the map value is only updated for a single CPU.
+        */
+       int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+       if (cur_pid == inPid)
+               bpf_map_update_elem(&hashmap1, &inKey, &inValue, BPF_NOEXIST);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index bb543bf..361235a 100644 (file)
         ],
         "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
         "expExitCode": "0",
-        "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+        "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
         "matchPattern": "filter protocol ip pref 1 flower.*handle",
         "matchCount": "1",
         "teardown": [
         ],
         "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
         "expExitCode": "0",
-        "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+        "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
         "matchPattern": "  dst_mac e4:11:22:11:4a:51",
         "matchCount": "0",
         "teardown": [