Merge tag 'net-5.13-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Jun 2021 01:25:39 +0000 (18:25 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Jun 2021 01:25:39 +0000 (18:25 -0700)
Pull networking fixes from Jakub Kicinski:
 "Networking fixes, including fixes from bpf, wireless, netfilter and
  wireguard trees.

  The bpf vs lockdown+audit fix is the most notable.

  Things haven't slowed down just yet, both in terms of regressions in
  current release and largish fixes for older code, but we usually see a
  slowdown only after -rc5.

  Current release - regressions:

   - virtio-net: fix page faults and crashes when XDP is enabled

   - mlx5e: fix HW timestamping with CQE compression, and make sure they
     are only allowed to coexist with capable devices

   - stmmac:
      - fix kernel panic due to NULL pointer dereference of
        mdio_bus_data
      - fix double clk unprepare when no PHY device is connected

  Current release - new code bugs:

   - mt76: a few fixes for the recent MT7921 devices and runtime power
     management

  Previous releases - regressions:

   - ice:
      - track AF_XDP ZC enabled queues in bitmap to fix copy mode Tx
      - fix allowing VF to request more/less queues via virtchnl
      - correct supported and advertised autoneg by using PHY
        capabilities
      - allow all LLDP packets from PF to Tx

   - kbuild: quote OBJCOPY var to avoid a pahole call break the build

  Previous releases - always broken:

   - bpf, lockdown, audit: fix buggy SELinux lockdown permission checks

   - mt76: address the recent FragAttack vulnerabilities not covered by
     generic fixes

   - ipv6: fix KASAN: slab-out-of-bounds Read in
     fib6_nh_flush_exceptions

   - Bluetooth:
      - fix the erroneous flush_work() order, to avoid double free
      - use correct lock to prevent UAF of hdev object

   - nfc: fix NULL ptr dereference in llcp_sock_getname() after failed
     connect

   - ieee802154: multiple fixes to error checking and return values

   - igb: fix XDP with PTP enabled

   - intel: add correct exception tracing for XDP

   - tls: fix use-after-free when TLS offload device goes down and back
     up

   - ipvs: ignore IP_VS_SVC_F_HASHED flag when adding service

   - netfilter: nft_ct: skip expectations for confirmed conntrack

   - mptcp: fix falling back to TCP in presence of out of order packets
     early in connection lifetime

   - wireguard: switch from O(n) to a O(1) algorithm for maintaining
     peers, fixing stalls and a large memory leak in the process

  Misc:

   - devlink: correct VIRTUAL port to not have phys_port attributes

   - Bluetooth: fix VIRTIO_ID_BT assigned number

   - net: return the correct errno code ENOBUF -> ENOMEM

   - wireguard:
      - peer: allocate in kmem_cache saving 25% on peer memory
      - do not use -O3"

* tag 'net-5.13-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (91 commits)
  cxgb4: avoid link re-train during TC-MQPRIO configuration
  sch_htb: fix refcount leak in htb_parent_to_leaf_offload
  wireguard: allowedips: free empty intermediate nodes when removing single node
  wireguard: allowedips: allocate nodes in kmem_cache
  wireguard: allowedips: remove nodes in O(1)
  wireguard: allowedips: initialize list head in selftest
  wireguard: peer: allocate in kmem_cache
  wireguard: use synchronize_net rather than synchronize_rcu
  wireguard: do not use -O3
  wireguard: selftests: make sure rp_filter is disabled on vethc
  wireguard: selftests: remove old conntrack kconfig value
  virtchnl: Add missing padding to virtchnl_proto_hdrs
  ice: Allow all LLDP packets from PF to Tx
  ice: report supported and advertised autoneg using PHY capabilities
  ice: handle the VF VSI rebuild failure
  ice: Fix VFR issues for AVF drivers that expect ATQLEN cleared
  ice: Fix allowing VF to request more/less queues via virtchnl
  virtio-net: fix for skb_over_panic inside big mode
  ipv6: Fix KASAN: slab-out-of-bounds Read in fib6_nh_flush_exceptions
  fib: Return the correct errno code
  ...

102 files changed:
MAINTAINERS
drivers/bluetooth/btusb.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_xsk.c
drivers/net/ethernet/intel/ice/ice.h
drivers/net/ethernet/intel/ice/ice_ethtool.c
drivers/net/ethernet/intel/ice/ice_hw_autogen.h
drivers/net/ethernet/intel/ice/ice_lib.c
drivers/net/ethernet/intel/ice/ice_txrx.c
drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
drivers/net/ethernet/intel/ice/ice_xsk.c
drivers/net/ethernet/intel/igb/igb.h
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/intel/igc/igc_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
drivers/net/ethernet/myricom/myri10ge/myri10ge.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ieee802154/mrf24j40.c
drivers/net/virtio_net.c
drivers/net/wireguard/Makefile
drivers/net/wireguard/allowedips.c
drivers/net/wireguard/allowedips.h
drivers/net/wireguard/main.c
drivers/net/wireguard/peer.c
drivers/net/wireguard/peer.h
drivers/net/wireguard/selftest/allowedips.c
drivers/net/wireguard/socket.c
drivers/net/wireless/mediatek/mt76/mac80211.c
drivers/net/wireless/mediatek/mt76/mt7615/init.c
drivers/net/wireless/mediatek/mt76/mt7615/mac.c
drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c
drivers/net/wireless/mediatek/mt76/mt7615/usb_mcu.c
drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
drivers/net/wireless/mediatek/mt76/mt7921/init.c
drivers/net/wireless/mediatek/mt76/mt7921/mac.c
drivers/net/wireless/mediatek/mt76/mt7921/main.c
drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
include/linux/avf/virtchnl.h
include/linux/mlx5/mlx5_ifc.h
include/net/caif/caif_dev.h
include/net/caif/cfcnfg.h
include/net/caif/cfserl.h
include/net/netfilter/nf_tables.h
include/net/tls.h
include/uapi/linux/virtio_ids.h
kernel/bpf/helpers.c
kernel/trace/bpf_trace.c
net/bluetooth/hci_core.c
net/bluetooth/hci_sock.c
net/caif/caif_dev.c
net/caif/caif_usb.c
net/caif/cfcnfg.c
net/caif/cfserl.c
net/compat.c
net/core/devlink.c
net/core/fib_rules.c
net/core/rtnetlink.c
net/core/sock.c
net/dsa/tag_8021q.c
net/ieee802154/nl-mac.c
net/ieee802154/nl-phy.c
net/ieee802154/nl802154.c
net/ipv4/ipconfig.c
net/ipv6/route.c
net/ipv6/sit.c
net/kcm/kcmsock.c
net/mptcp/protocol.c
net/mptcp/subflow.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/nf_conntrack_proto.c
net/netfilter/nf_tables_api.c
net/netfilter/nfnetlink_cthelper.c
net/netfilter/nft_ct.c
net/nfc/llcp_sock.c
net/sched/act_ct.c
net/sched/sch_htb.c
net/tls/tls_device.c
net/tls/tls_device_fallback.c
net/tls/tls_main.c
net/x25/af_x25.c
scripts/Makefile.modfinal
scripts/link-vmlinux.sh
tools/testing/selftests/net/mptcp/mptcp_connect.sh
tools/testing/selftests/wireguard/netns.sh
tools/testing/selftests/wireguard/qemu/kernel.config

index 32d3130..b706dd2 100644 (file)
@@ -12905,7 +12905,7 @@ F:      net/ipv4/nexthop.c
 
 NFC SUBSYSTEM
 M:     Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
-L:     linux-nfc@lists.01.org (moderated for non-subscribers)
+L:     linux-nfc@lists.01.org (subscribers-only)
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/net/nfc/
@@ -12918,7 +12918,7 @@ F:      net/nfc/
 NFC VIRTUAL NCI DEVICE DRIVER
 M:     Bongsu Jeon <bongsu.jeon@samsung.com>
 L:     netdev@vger.kernel.org
-L:     linux-nfc@lists.01.org (moderated for non-subscribers)
+L:     linux-nfc@lists.01.org (subscribers-only)
 S:     Supported
 F:     drivers/nfc/virtual_ncidev.c
 F:     tools/testing/selftests/nci/
@@ -13216,7 +13216,7 @@ F:      sound/soc/codecs/tfa9879*
 
 NXP-NCI NFC DRIVER
 R:     Charles Gorand <charles.gorand@effinnov.com>
-L:     linux-nfc@lists.01.org (moderated for non-subscribers)
+L:     linux-nfc@lists.01.org (subscribers-only)
 S:     Supported
 F:     drivers/nfc/nxp-nci
 
@@ -16147,7 +16147,7 @@ F:      include/media/drv-intf/s3c_camif.h
 SAMSUNG S3FWRN5 NFC DRIVER
 M:     Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
 M:     Krzysztof Opasiak <k.opasiak@samsung.com>
-L:     linux-nfc@lists.01.org (moderated for non-subscribers)
+L:     linux-nfc@lists.01.org (subscribers-only)
 S:     Maintained
 F:     Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
 F:     drivers/nfc/s3fwrn5
@@ -18337,7 +18337,7 @@ F:      sound/soc/codecs/tas571x*
 TI TRF7970A NFC DRIVER
 M:     Mark Greer <mgreer@animalcreek.com>
 L:     linux-wireless@vger.kernel.org
-L:     linux-nfc@lists.01.org (moderated for non-subscribers)
+L:     linux-nfc@lists.01.org (subscribers-only)
 S:     Supported
 F:     Documentation/devicetree/bindings/net/nfc/trf7970a.txt
 F:     drivers/nfc/trf7970a.c
index 7a8e1d2..7f6ba2c 100644 (file)
@@ -2529,10 +2529,17 @@ static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
        }
 
        btusb_setup_intel_newgen_get_fw_name(ver, fwname, sizeof(fwname), "sfi");
-       err = request_firmware(&fw, fwname, &hdev->dev);
+       err = firmware_request_nowarn(&fw, fwname, &hdev->dev);
        if (err < 0) {
+               if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) {
+                       /* Firmware has already been loaded */
+                       set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
+                       return 0;
+               }
+
                bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
                           fwname, err);
+
                return err;
        }
 
@@ -2682,12 +2689,24 @@ download:
        err = btusb_setup_intel_new_get_fw_name(ver, params, fwname,
                                                sizeof(fwname), "sfi");
        if (err < 0) {
+               if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) {
+                       /* Firmware has already been loaded */
+                       set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
+                       return 0;
+               }
+
                bt_dev_err(hdev, "Unsupported Intel firmware naming");
                return -EINVAL;
        }
 
-       err = request_firmware(&fw, fwname, &hdev->dev);
+       err = firmware_request_nowarn(&fw, fwname, &hdev->dev);
        if (err < 0) {
+               if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) {
+                       /* Firmware has already been loaded */
+                       set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
+                       return 0;
+               }
+
                bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
                           fwname, err);
                return err;
index 314f8d8..9058f09 100644 (file)
@@ -2177,8 +2177,6 @@ int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid,
                          bool persistent, u8 *smt_idx);
 int cxgb4_get_msix_idx_from_bmap(struct adapter *adap);
 void cxgb4_free_msix_idx_in_bmap(struct adapter *adap, u32 msix_idx);
-int cxgb_open(struct net_device *dev);
-int cxgb_close(struct net_device *dev);
 void cxgb4_enable_rx(struct adapter *adap, struct sge_rspq *q);
 void cxgb4_quiesce_rx(struct sge_rspq *q);
 int cxgb4_port_mirror_alloc(struct net_device *dev);
index 421bd9b..1f601de 100644 (file)
@@ -2834,7 +2834,7 @@ static void cxgb_down(struct adapter *adapter)
 /*
  * net_device operations
  */
-int cxgb_open(struct net_device *dev)
+static int cxgb_open(struct net_device *dev)
 {
        struct port_info *pi = netdev_priv(dev);
        struct adapter *adapter = pi->adapter;
@@ -2882,7 +2882,7 @@ out_unlock:
        return err;
 }
 
-int cxgb_close(struct net_device *dev)
+static int cxgb_close(struct net_device *dev)
 {
        struct port_info *pi = netdev_priv(dev);
        struct adapter *adapter = pi->adapter;
index 1b88bd1..dd9be22 100644 (file)
@@ -997,20 +997,16 @@ int cxgb4_tc_flower_destroy(struct net_device *dev,
        if (!ch_flower)
                return -ENOENT;
 
+       rhashtable_remove_fast(&adap->flower_tbl, &ch_flower->node,
+                              adap->flower_ht_params);
+
        ret = cxgb4_flow_rule_destroy(dev, ch_flower->fs.tc_prio,
                                      &ch_flower->fs, ch_flower->filter_id);
        if (ret)
-               goto err;
+               netdev_err(dev, "Flow rule destroy failed for tid: %u, ret: %d",
+                          ch_flower->filter_id, ret);
 
-       ret = rhashtable_remove_fast(&adap->flower_tbl, &ch_flower->node,
-                                    adap->flower_ht_params);
-       if (ret) {
-               netdev_err(dev, "Flow remove from rhashtable failed");
-               goto err;
-       }
        kfree_rcu(ch_flower, rcu);
-
-err:
        return ret;
 }
 
index 6c259de..338b04f 100644 (file)
@@ -589,7 +589,8 @@ int cxgb4_setup_tc_mqprio(struct net_device *dev,
         * down before configuring tc params.
         */
        if (netif_running(dev)) {
-               cxgb_close(dev);
+               netif_tx_stop_all_queues(dev);
+               netif_carrier_off(dev);
                needs_bring_up = true;
        }
 
@@ -615,8 +616,10 @@ int cxgb4_setup_tc_mqprio(struct net_device *dev,
        }
 
 out:
-       if (needs_bring_up)
-               cxgb_open(dev);
+       if (needs_bring_up) {
+               netif_tx_start_all_queues(dev);
+               netif_carrier_on(dev);
+       }
 
        mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
        return ret;
index 1e5f2ed..6a099cb 100644 (file)
@@ -2556,6 +2556,12 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
        if (!eosw_txq)
                return -ENOMEM;
 
+       if (!(adap->flags & CXGB4_FW_OK)) {
+               /* Don't stall caller when access to FW is lost */
+               complete(&eosw_txq->completion);
+               return -EIO;
+       }
+
        skb = alloc_skb(len, GFP_KERNEL);
        if (!skb)
                return -ENOMEM;
index de70c16..b883ab8 100644 (file)
@@ -2313,15 +2313,20 @@ static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
        case XDP_TX:
                xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
                result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
+               if (result == I40E_XDP_CONSUMED)
+                       goto out_failure;
                break;
        case XDP_REDIRECT:
                err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-               result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
+               if (err)
+                       goto out_failure;
+               result = I40E_XDP_REDIR;
                break;
        default:
                bpf_warn_invalid_xdp_action(act);
                fallthrough;
        case XDP_ABORTED:
+out_failure:
                trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
                fallthrough; /* handle aborts by dropping packet */
        case XDP_DROP:
index 46d8844..68f177a 100644 (file)
@@ -162,9 +162,10 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 
        if (likely(act == XDP_REDIRECT)) {
                err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-               result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
+               if (err)
+                       goto out_failure;
                rcu_read_unlock();
-               return result;
+               return I40E_XDP_REDIR;
        }
 
        switch (act) {
@@ -173,11 +174,14 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
        case XDP_TX:
                xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
                result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
+               if (result == I40E_XDP_CONSUMED)
+                       goto out_failure;
                break;
        default:
                bpf_warn_invalid_xdp_action(act);
                fallthrough;
        case XDP_ABORTED:
+out_failure:
                trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
                fallthrough; /* handle aborts by dropping packet */
        case XDP_DROP:
index e35db3f..2924c67 100644 (file)
@@ -335,6 +335,7 @@ struct ice_vsi {
        struct ice_tc_cfg tc_cfg;
        struct bpf_prog *xdp_prog;
        struct ice_ring **xdp_rings;     /* XDP ring array */
+       unsigned long *af_xdp_zc_qps;    /* tracks AF_XDP ZC enabled qps */
        u16 num_xdp_txq;                 /* Used XDP queues */
        u8 xdp_mapping_mode;             /* ICE_MAP_MODE_[CONTIG|SCATTER] */
 
@@ -547,15 +548,16 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring)
  */
 static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_ring *ring)
 {
+       struct ice_vsi *vsi = ring->vsi;
        u16 qid = ring->q_index;
 
        if (ice_ring_is_xdp(ring))
-               qid -= ring->vsi->num_xdp_txq;
+               qid -= vsi->num_xdp_txq;
 
-       if (!ice_is_xdp_ena_vsi(ring->vsi))
+       if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
                return NULL;
 
-       return xsk_get_pool_from_qid(ring->vsi->netdev, qid);
+       return xsk_get_pool_from_qid(vsi->netdev, qid);
 }
 
 /**
index d9ddd0b..99301ad 100644 (file)
@@ -1773,49 +1773,6 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
                ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
                                                100000baseKR4_Full);
        }
-
-       /* Autoneg PHY types */
-       if (phy_types_low & ICE_PHY_TYPE_LOW_100BASE_TX ||
-           phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_T ||
-           phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_KX ||
-           phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_T ||
-           phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_KX ||
-           phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_T ||
-           phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_KR ||
-           phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_T ||
-           phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 ||
-           phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_T ||
-           phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR ||
-           phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR_S ||
-           phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR1 ||
-           phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR ||
-           phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR_S ||
-           phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR1 ||
-           phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_CR4 ||
-           phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_KR4) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    Autoneg);
-               ethtool_link_ksettings_add_link_mode(ks, advertising,
-                                                    Autoneg);
-       }
-       if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CR2 ||
-           phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR2 ||
-           phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CP ||
-           phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    Autoneg);
-               ethtool_link_ksettings_add_link_mode(ks, advertising,
-                                                    Autoneg);
-       }
-       if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR4 ||
-           phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR4 ||
-           phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 ||
-           phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2) {
-               ethtool_link_ksettings_add_link_mode(ks, supported,
-                                                    Autoneg);
-               ethtool_link_ksettings_add_link_mode(ks, advertising,
-                                                    Autoneg);
-       }
 }
 
 #define TEST_SET_BITS_TIMEOUT  50
@@ -1972,9 +1929,7 @@ ice_get_link_ksettings(struct net_device *netdev,
                ks->base.port = PORT_TP;
                break;
        case ICE_MEDIA_BACKPLANE:
-               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
                ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
-               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
                ethtool_link_ksettings_add_link_mode(ks, advertising,
                                                     Backplane);
                ks->base.port = PORT_NONE;
@@ -2049,6 +2004,12 @@ ice_get_link_ksettings(struct net_device *netdev,
        if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
                ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
 
+       /* Set supported and advertised autoneg */
+       if (ice_is_phy_caps_an_enabled(caps)) {
+               ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+               ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+       }
+
 done:
        kfree(caps);
        return err;
index de38a0f..9b8300d 100644 (file)
@@ -31,6 +31,7 @@
 #define PF_FW_ATQLEN_ATQOVFL_M                 BIT(29)
 #define PF_FW_ATQLEN_ATQCRIT_M                 BIT(30)
 #define VF_MBX_ARQLEN(_VF)                     (0x0022BC00 + ((_VF) * 4))
+#define VF_MBX_ATQLEN(_VF)                     (0x0022A800 + ((_VF) * 4))
 #define PF_FW_ATQLEN_ATQENABLE_M               BIT(31)
 #define PF_FW_ATQT                             0x00080400
 #define PF_MBX_ARQBAH                          0x0022E400
index 82e2ce2..d70ee57 100644 (file)
@@ -105,8 +105,14 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
        if (!vsi->q_vectors)
                goto err_vectors;
 
+       vsi->af_xdp_zc_qps = bitmap_zalloc(max_t(int, vsi->alloc_txq, vsi->alloc_rxq), GFP_KERNEL);
+       if (!vsi->af_xdp_zc_qps)
+               goto err_zc_qps;
+
        return 0;
 
+err_zc_qps:
+       devm_kfree(dev, vsi->q_vectors);
 err_vectors:
        devm_kfree(dev, vsi->rxq_map);
 err_rxq_map:
@@ -194,6 +200,8 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
                break;
        case ICE_VSI_VF:
                vf = &pf->vf[vsi->vf_id];
+               if (vf->num_req_qs)
+                       vf->num_vf_qs = vf->num_req_qs;
                vsi->alloc_txq = vf->num_vf_qs;
                vsi->alloc_rxq = vf->num_vf_qs;
                /* pf->num_msix_per_vf includes (VF miscellaneous vector +
@@ -288,6 +296,10 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi)
 
        dev = ice_pf_to_dev(pf);
 
+       if (vsi->af_xdp_zc_qps) {
+               bitmap_free(vsi->af_xdp_zc_qps);
+               vsi->af_xdp_zc_qps = NULL;
+       }
        /* free the ring and vector containers */
        if (vsi->q_vectors) {
                devm_kfree(dev, vsi->q_vectors);
index e2b4b29..04748aa 100644 (file)
@@ -523,7 +523,7 @@ ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
            struct bpf_prog *xdp_prog)
 {
        struct ice_ring *xdp_ring;
-       int err;
+       int err, result;
        u32 act;
 
        act = bpf_prog_run_xdp(xdp_prog, xdp);
@@ -532,14 +532,20 @@ ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
                return ICE_XDP_PASS;
        case XDP_TX:
                xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()];
-               return ice_xmit_xdp_buff(xdp, xdp_ring);
+               result = ice_xmit_xdp_buff(xdp, xdp_ring);
+               if (result == ICE_XDP_CONSUMED)
+                       goto out_failure;
+               return result;
        case XDP_REDIRECT:
                err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-               return !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+               if (err)
+                       goto out_failure;
+               return ICE_XDP_REDIR;
        default:
                bpf_warn_invalid_xdp_action(act);
                fallthrough;
        case XDP_ABORTED:
+out_failure:
                trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
                fallthrough;
        case XDP_DROP:
@@ -2143,6 +2149,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
        struct ice_tx_offload_params offload = { 0 };
        struct ice_vsi *vsi = tx_ring->vsi;
        struct ice_tx_buf *first;
+       struct ethhdr *eth;
        unsigned int count;
        int tso, csum;
 
@@ -2189,7 +2196,9 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
                goto out_drop;
 
        /* allow CONTROL frames egress from main VSI if FW LLDP disabled */
-       if (unlikely(skb->priority == TC_PRIO_CONTROL &&
+       eth = (struct ethhdr *)skb_mac_header(skb);
+       if (unlikely((skb->priority == TC_PRIO_CONTROL ||
+                     eth->h_proto == htons(ETH_P_LLDP)) &&
                     vsi->type == ICE_VSI_PF &&
                     vsi->port_info->qos_cfg.is_sw_lldp))
                offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
index a1d22d2..97a46c6 100644 (file)
@@ -713,13 +713,15 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
         */
        clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
 
-       /* VF_MBX_ARQLEN is cleared by PFR, so the driver needs to clear it
-        * in the case of VFR. If this is done for PFR, it can mess up VF
-        * resets because the VF driver may already have started cleanup
-        * by the time we get here.
+       /* VF_MBX_ARQLEN and VF_MBX_ATQLEN are cleared by PFR, so the driver
+        * needs to clear them in the case of VFR/VFLR. If this is done for
+        * PFR, it can mess up VF resets because the VF driver may already
+        * have started cleanup by the time we get here.
         */
-       if (!is_pfr)
+       if (!is_pfr) {
                wr32(hw, VF_MBX_ARQLEN(vf->vf_id), 0);
+               wr32(hw, VF_MBX_ATQLEN(vf->vf_id), 0);
+       }
 
        /* In the case of a VFLR, the HW has already reset the VF and we
         * just need to clean up, so don't hit the VFRTRIG register.
@@ -1698,7 +1700,12 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
                ice_vf_ctrl_vsi_release(vf);
 
        ice_vf_pre_vsi_rebuild(vf);
-       ice_vf_rebuild_vsi_with_release(vf);
+
+       if (ice_vf_rebuild_vsi_with_release(vf)) {
+               dev_err(dev, "Failed to release and setup the VF%u's VSI\n", vf->vf_id);
+               return false;
+       }
+
        ice_vf_post_vsi_rebuild(vf);
 
        /* if the VF has been reset allow it to come up again */
index faa7b8d..a1f89ea 100644 (file)
@@ -270,6 +270,7 @@ static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
        if (!pool)
                return -EINVAL;
 
+       clear_bit(qid, vsi->af_xdp_zc_qps);
        xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
 
        return 0;
@@ -300,6 +301,8 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
        if (err)
                return err;
 
+       set_bit(qid, vsi->af_xdp_zc_qps);
+
        return 0;
 }
 
@@ -473,9 +476,10 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
 
        if (likely(act == XDP_REDIRECT)) {
                err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-               result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+               if (err)
+                       goto out_failure;
                rcu_read_unlock();
-               return result;
+               return ICE_XDP_REDIR;
        }
 
        switch (act) {
@@ -484,11 +488,14 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
        case XDP_TX:
                xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
                result = ice_xmit_xdp_buff(xdp, xdp_ring);
+               if (result == ICE_XDP_CONSUMED)
+                       goto out_failure;
                break;
        default:
                bpf_warn_invalid_xdp_action(act);
                fallthrough;
        case XDP_ABORTED:
+out_failure:
                trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
                fallthrough;
        case XDP_DROP:
index 7bda8c5..2d3daf0 100644 (file)
@@ -749,7 +749,7 @@ void igb_ptp_rx_hang(struct igb_adapter *adapter);
 void igb_ptp_tx_hang(struct igb_adapter *adapter);
 void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb);
 int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
-                       struct sk_buff *skb);
+                       ktime_t *timestamp);
 int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
 int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
 void igb_set_flag_queue_pairs(struct igb_adapter *, const u32);
index 038a9fd..b2a042f 100644 (file)
@@ -8280,7 +8280,7 @@ static void igb_add_rx_frag(struct igb_ring *rx_ring,
 static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
                                         struct igb_rx_buffer *rx_buffer,
                                         struct xdp_buff *xdp,
-                                        union e1000_adv_rx_desc *rx_desc)
+                                        ktime_t timestamp)
 {
 #if (PAGE_SIZE < 8192)
        unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
@@ -8300,12 +8300,8 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
        if (unlikely(!skb))
                return NULL;
 
-       if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) {
-               if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb)) {
-                       xdp->data += IGB_TS_HDR_LEN;
-                       size -= IGB_TS_HDR_LEN;
-               }
-       }
+       if (timestamp)
+               skb_hwtstamps(skb)->hwtstamp = timestamp;
 
        /* Determine available headroom for copy */
        headlen = size;
@@ -8336,7 +8332,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
 static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
                                     struct igb_rx_buffer *rx_buffer,
                                     struct xdp_buff *xdp,
-                                    union e1000_adv_rx_desc *rx_desc)
+                                    ktime_t timestamp)
 {
 #if (PAGE_SIZE < 8192)
        unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
@@ -8363,11 +8359,8 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
        if (metasize)
                skb_metadata_set(skb, metasize);
 
-       /* pull timestamp out of packet data */
-       if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
-               if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb))
-                       __skb_pull(skb, IGB_TS_HDR_LEN);
-       }
+       if (timestamp)
+               skb_hwtstamps(skb)->hwtstamp = timestamp;
 
        /* update buffer offset */
 #if (PAGE_SIZE < 8192)
@@ -8401,18 +8394,20 @@ static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
                break;
        case XDP_TX:
                result = igb_xdp_xmit_back(adapter, xdp);
+               if (result == IGB_XDP_CONSUMED)
+                       goto out_failure;
                break;
        case XDP_REDIRECT:
                err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
-               if (!err)
-                       result = IGB_XDP_REDIR;
-               else
-                       result = IGB_XDP_CONSUMED;
+               if (err)
+                       goto out_failure;
+               result = IGB_XDP_REDIR;
                break;
        default:
                bpf_warn_invalid_xdp_action(act);
                fallthrough;
        case XDP_ABORTED:
+out_failure:
                trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
                fallthrough;
        case XDP_DROP:
@@ -8682,7 +8677,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
        while (likely(total_packets < budget)) {
                union e1000_adv_rx_desc *rx_desc;
                struct igb_rx_buffer *rx_buffer;
+               ktime_t timestamp = 0;
+               int pkt_offset = 0;
                unsigned int size;
+               void *pktbuf;
 
                /* return some buffers to hardware, one at a time is too slow */
                if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
@@ -8702,14 +8700,24 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
                dma_rmb();
 
                rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt);
+               pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
+
+               /* pull rx packet timestamp if available and valid */
+               if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+                       int ts_hdr_len;
+
+                       ts_hdr_len = igb_ptp_rx_pktstamp(rx_ring->q_vector,
+                                                        pktbuf, &timestamp);
+
+                       pkt_offset += ts_hdr_len;
+                       size -= ts_hdr_len;
+               }
 
                /* retrieve a buffer from the ring */
                if (!skb) {
-                       unsigned int offset = igb_rx_offset(rx_ring);
-                       unsigned char *hard_start;
+                       unsigned char *hard_start = pktbuf - igb_rx_offset(rx_ring);
+                       unsigned int offset = pkt_offset + igb_rx_offset(rx_ring);
 
-                       hard_start = page_address(rx_buffer->page) +
-                                    rx_buffer->page_offset - offset;
                        xdp_prepare_buff(&xdp, hard_start, offset, size, true);
 #if (PAGE_SIZE > 4096)
                        /* At larger PAGE_SIZE, frame_sz depend on len size */
@@ -8732,10 +8740,11 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
                } else if (skb)
                        igb_add_rx_frag(rx_ring, rx_buffer, skb, size);
                else if (ring_uses_build_skb(rx_ring))
-                       skb = igb_build_skb(rx_ring, rx_buffer, &xdp, rx_desc);
+                       skb = igb_build_skb(rx_ring, rx_buffer, &xdp,
+                                           timestamp);
                else
                        skb = igb_construct_skb(rx_ring, rx_buffer,
-                                               &xdp, rx_desc);
+                                               &xdp, timestamp);
 
                /* exit if we failed to retrieve a buffer */
                if (!skb) {
index ba61fe9..d68cd44 100644 (file)
@@ -856,30 +856,28 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
        dev_kfree_skb_any(skb);
 }
 
-#define IGB_RET_PTP_DISABLED 1
-#define IGB_RET_PTP_INVALID 2
-
 /**
  * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp
  * @q_vector: Pointer to interrupt specific structure
  * @va: Pointer to address containing Rx buffer
- * @skb: Buffer containing timestamp and packet
+ * @timestamp: Pointer where timestamp will be stored
  *
  * This function is meant to retrieve a timestamp from the first buffer of an
  * incoming frame.  The value is stored in little endian format starting on
  * byte 8
  *
- * Returns: 0 if success, nonzero if failure
+ * Returns: The timestamp header length or 0 if not available
  **/
 int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
-                       struct sk_buff *skb)
+                       ktime_t *timestamp)
 {
        struct igb_adapter *adapter = q_vector->adapter;
+       struct skb_shared_hwtstamps ts;
        __le64 *regval = (__le64 *)va;
        int adjust = 0;
 
        if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
-               return IGB_RET_PTP_DISABLED;
+               return 0;
 
        /* The timestamp is recorded in little endian format.
         * DWORD: 0        1        2        3
@@ -888,10 +886,9 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
 
        /* check reserved dwords are zero, be/le doesn't matter for zero */
        if (regval[0])
-               return IGB_RET_PTP_INVALID;
+               return 0;
 
-       igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
-                                  le64_to_cpu(regval[1]));
+       igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1]));
 
        /* adjust timestamp for the RX latency based on link speed */
        if (adapter->hw.mac.type == e1000_i210) {
@@ -907,10 +904,10 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
                        break;
                }
        }
-       skb_hwtstamps(skb)->hwtstamp =
-               ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
 
-       return 0;
+       *timestamp = ktime_sub_ns(ts.hwtstamp, adjust);
+
+       return IGB_TS_HDR_LEN;
 }
 
 /**
index 069471b..f1adf15 100644 (file)
@@ -2047,20 +2047,19 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
                break;
        case XDP_TX:
                if (igc_xdp_xmit_back(adapter, xdp) < 0)
-                       res = IGC_XDP_CONSUMED;
-               else
-                       res = IGC_XDP_TX;
+                       goto out_failure;
+               res = IGC_XDP_TX;
                break;
        case XDP_REDIRECT:
                if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
-                       res = IGC_XDP_CONSUMED;
-               else
-                       res = IGC_XDP_REDIRECT;
+                       goto out_failure;
+               res = IGC_XDP_REDIRECT;
                break;
        default:
                bpf_warn_invalid_xdp_action(act);
                fallthrough;
        case XDP_ABORTED:
+out_failure:
                trace_xdp_exception(adapter->netdev, prog, act);
                fallthrough;
        case XDP_DROP:
index c5ec17d..2ac5b82 100644 (file)
@@ -2213,23 +2213,23 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
                break;
        case XDP_TX:
                xdpf = xdp_convert_buff_to_frame(xdp);
-               if (unlikely(!xdpf)) {
-                       result = IXGBE_XDP_CONSUMED;
-                       break;
-               }
+               if (unlikely(!xdpf))
+                       goto out_failure;
                result = ixgbe_xmit_xdp_ring(adapter, xdpf);
+               if (result == IXGBE_XDP_CONSUMED)
+                       goto out_failure;
                break;
        case XDP_REDIRECT:
                err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
-               if (!err)
-                       result = IXGBE_XDP_REDIR;
-               else
-                       result = IXGBE_XDP_CONSUMED;
+               if (err)
+                       goto out_failure;
+               result = IXGBE_XDP_REDIR;
                break;
        default:
                bpf_warn_invalid_xdp_action(act);
                fallthrough;
        case XDP_ABORTED:
+out_failure:
                trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
                fallthrough; /* handle aborts by dropping packet */
        case XDP_DROP:
index 91ad5b9..f72d297 100644 (file)
@@ -106,9 +106,10 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 
        if (likely(act == XDP_REDIRECT)) {
                err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-               result = !err ? IXGBE_XDP_REDIR : IXGBE_XDP_CONSUMED;
+               if (err)
+                       goto out_failure;
                rcu_read_unlock();
-               return result;
+               return IXGBE_XDP_REDIR;
        }
 
        switch (act) {
@@ -116,16 +117,17 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
                break;
        case XDP_TX:
                xdpf = xdp_convert_buff_to_frame(xdp);
-               if (unlikely(!xdpf)) {
-                       result = IXGBE_XDP_CONSUMED;
-                       break;
-               }
+               if (unlikely(!xdpf))
+                       goto out_failure;
                result = ixgbe_xmit_xdp_ring(adapter, xdpf);
+               if (result == IXGBE_XDP_CONSUMED)
+                       goto out_failure;
                break;
        default:
                bpf_warn_invalid_xdp_action(act);
                fallthrough;
        case XDP_ABORTED:
+out_failure:
                trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
                fallthrough; /* handle aborts by dropping packet */
        case XDP_DROP:
index ba2ed8a..0e733cc 100644 (file)
@@ -1067,11 +1067,14 @@ static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
        case XDP_TX:
                xdp_ring = adapter->xdp_ring[rx_ring->queue_index];
                result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp);
+               if (result == IXGBEVF_XDP_CONSUMED)
+                       goto out_failure;
                break;
        default:
                bpf_warn_invalid_xdp_action(act);
                fallthrough;
        case XDP_ABORTED:
+out_failure:
                trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
                fallthrough; /* handle aborts by dropping packet */
        case XDP_DROP:
index 8360289..d6513ae 100644 (file)
@@ -1624,12 +1624,13 @@ static int mlx5e_set_fecparam(struct net_device *netdev,
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
        struct mlx5_core_dev *mdev = priv->mdev;
+       unsigned long fec_bitmap;
        u16 fec_policy = 0;
        int mode;
        int err;
 
-       if (bitmap_weight((unsigned long *)&fecparam->fec,
-                         ETHTOOL_FEC_LLRS_BIT + 1) > 1)
+       bitmap_from_arr32(&fec_bitmap, &fecparam->fec, sizeof(fecparam->fec) * BITS_PER_BYTE);
+       if (bitmap_weight(&fec_bitmap, ETHTOOL_FEC_LLRS_BIT + 1) > 1)
                return -EOPNOTSUPP;
 
        for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) {
@@ -1893,6 +1894,13 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
        if (curr_val == new_val)
                return 0;
 
+       if (new_val && !priv->profile->rx_ptp_support &&
+           priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) {
+               netdev_err(priv->netdev,
+                          "Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n");
+               return -EINVAL;
+       }
+
        new_params = priv->channels.params;
        MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
        if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE)
index ad0f694..ec6bafe 100644 (file)
@@ -3858,6 +3858,16 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
                        netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
        }
 
+       if (mlx5e_is_uplink_rep(priv)) {
+               features &= ~NETIF_F_HW_TLS_RX;
+               if (netdev->features & NETIF_F_HW_TLS_RX)
+                       netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
+
+               features &= ~NETIF_F_HW_TLS_TX;
+               if (netdev->features & NETIF_F_HW_TLS_TX)
+                       netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
+       }
+
        mutex_unlock(&priv->state_lock);
 
        return features;
@@ -3974,11 +3984,45 @@ int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx)
        return mlx5e_ptp_rx_manage_fs(priv, set);
 }
 
-int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
+static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filter)
+{
+       bool rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
+       int err;
+
+       if (!rx_filter)
+               /* Reset CQE compression to Admin default */
+               return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def);
+
+       if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
+               return 0;
+
+       /* Disable CQE compression */
+       netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
+       err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
+       if (err)
+               netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
+
+       return err;
+}
+
+static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx)
 {
        struct mlx5e_params new_params;
+
+       if (ptp_rx == priv->channels.params.ptp_rx)
+               return 0;
+
+       new_params = priv->channels.params;
+       new_params.ptp_rx = ptp_rx;
+       return mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
+                                       &new_params.ptp_rx, true);
+}
+
+int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
+{
        struct hwtstamp_config config;
        bool rx_cqe_compress_def;
+       bool ptp_rx;
        int err;
 
        if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
@@ -3998,13 +4042,12 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
        }
 
        mutex_lock(&priv->state_lock);
-       new_params = priv->channels.params;
        rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
 
        /* RX HW timestamp */
        switch (config.rx_filter) {
        case HWTSTAMP_FILTER_NONE:
-               new_params.ptp_rx = false;
+               ptp_rx = false;
                break;
        case HWTSTAMP_FILTER_ALL:
        case HWTSTAMP_FILTER_SOME:
@@ -4021,24 +4064,25 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
        case HWTSTAMP_FILTER_PTP_V2_SYNC:
        case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
        case HWTSTAMP_FILTER_NTP_ALL:
-               new_params.ptp_rx = rx_cqe_compress_def;
                config.rx_filter = HWTSTAMP_FILTER_ALL;
+               /* ptp_rx is set if both HW TS is set and CQE
+                * compression is set
+                */
+               ptp_rx = rx_cqe_compress_def;
                break;
        default:
-               mutex_unlock(&priv->state_lock);
-               return -ERANGE;
+               err = -ERANGE;
+               goto err_unlock;
        }
 
-       if (new_params.ptp_rx == priv->channels.params.ptp_rx)
-               goto out;
+       if (!priv->profile->rx_ptp_support)
+               err = mlx5e_hwstamp_config_no_ptp_rx(priv,
+                                                    config.rx_filter != HWTSTAMP_FILTER_NONE);
+       else
+               err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx);
+       if (err)
+               goto err_unlock;
 
-       err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
-                                      &new_params.ptp_rx, true);
-       if (err) {
-               mutex_unlock(&priv->state_lock);
-               return err;
-       }
-out:
        memcpy(&priv->tstamp, &config, sizeof(config));
        mutex_unlock(&priv->state_lock);
 
@@ -4047,6 +4091,9 @@ out:
 
        return copy_to_user(ifr->ifr_data, &config,
                            sizeof(config)) ? -EFAULT : 0;
+err_unlock:
+       mutex_unlock(&priv->state_lock);
+       return err;
 }
 
 int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
index 2c776e7..dd64878 100644 (file)
@@ -2015,11 +2015,13 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                                    misc_parameters_3);
        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
        struct flow_dissector *dissector = rule->match.dissector;
+       enum fs_flow_table_type fs_type;
        u16 addr_type = 0;
        u8 ip_proto = 0;
        u8 *match_level;
        int err;
 
+       fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
        match_level = outer_match_level;
 
        if (dissector->used_keys &
@@ -2145,6 +2147,13 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                if (match.mask->vlan_id ||
                    match.mask->vlan_priority ||
                    match.mask->vlan_tpid) {
+                       if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
+                                                    fs_type)) {
+                               NL_SET_ERR_MSG_MOD(extack,
+                                                  "Matching on CVLAN is not supported");
+                               return -EOPNOTSUPP;
+                       }
+
                        if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
                                MLX5_SET(fte_match_set_misc, misc_c,
                                         outer_second_svlan_tag, 1);
index db1e742..d18a28a 100644 (file)
@@ -219,7 +219,8 @@ esw_setup_slow_path_dest(struct mlx5_flow_destination *dest,
                         struct mlx5_fs_chains *chains,
                         int i)
 {
-       flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+       if (mlx5_chains_ignore_flow_level_supported(chains))
+               flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
        dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
        dest[i].ft = mlx5_chains_get_tc_end_ft(chains);
 }
index d5d5763..106b50e 100644 (file)
@@ -349,6 +349,9 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
                                                      reset_abort_work);
        struct mlx5_core_dev *dev = fw_reset->dev;
 
+       if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+               return;
+
        mlx5_sync_reset_clear_reset_requested(dev, true);
        mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
 }
index 00ef10a..20a4047 100644 (file)
@@ -107,7 +107,7 @@ bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains)
        return chains->flags & MLX5_CHAINS_AND_PRIOS_SUPPORTED;
 }
 
-static bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
+bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
 {
        return chains->flags & MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
 }
index e96f345..d50bdb2 100644 (file)
@@ -28,6 +28,7 @@ struct mlx5_chains_attr {
 
 bool
 mlx5_chains_prios_supported(struct mlx5_fs_chains *chains);
+bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains);
 bool
 mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains);
 u32
@@ -70,6 +71,10 @@ mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
 
 #else /* CONFIG_MLX5_CLS_ACT */
 
+static inline bool
+mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
+{ return false; }
+
 static inline struct mlx5_flow_table *
 mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
                      u32 level) { return ERR_PTR(-EOPNOTSUPP); }
index 1fbcd01..7ccfd40 100644 (file)
@@ -112,7 +112,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
        int ret;
 
        ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB;
-       ft_attr.level = dmn->info.caps.max_ft_level - 2;
+       ft_attr.level = min_t(int, dmn->info.caps.max_ft_level - 2,
+                             MLX5_FT_MAX_MULTIPATH_LEVEL);
        ft_attr.reformat_en = reformat_req;
        ft_attr.decap_en = reformat_req;
 
index c84c8bf..fc99ad8 100644 (file)
@@ -3815,6 +3815,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                dev_err(&pdev->dev,
                        "invalid sram_size %dB or board span %ldB\n",
                        mgp->sram_size, mgp->board_span);
+               status = -EINVAL;
                goto abort_with_ioremap;
        }
        memcpy_fromio(mgp->eeprom_strings,
index 5d956a5..c87202c 100644 (file)
@@ -1240,8 +1240,9 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
        priv->phylink_config.dev = &priv->dev->dev;
        priv->phylink_config.type = PHYLINK_NETDEV;
        priv->phylink_config.pcs_poll = true;
-       priv->phylink_config.ovr_an_inband =
-               priv->plat->mdio_bus_data->xpcs_an_inband;
+       if (priv->plat->mdio_bus_data)
+               priv->phylink_config.ovr_an_inband =
+                       priv->plat->mdio_bus_data->xpcs_an_inband;
 
        if (!fwnode)
                fwnode = dev_fwnode(priv->device);
@@ -7048,7 +7049,6 @@ error_mdio_register:
        stmmac_napi_del(ndev);
 error_hw_init:
        destroy_workqueue(priv->wq);
-       stmmac_bus_clks_config(priv, false);
        bitmap_free(priv->af_xdp_zc_qps);
 
        return ret;
index b9be530..ff83e00 100644 (file)
@@ -8,8 +8,8 @@
 
 #include <linux/spi/spi.h>
 #include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/ieee802154.h>
 #include <linux/irq.h>
@@ -1388,7 +1388,7 @@ MODULE_DEVICE_TABLE(spi, mrf24j40_ids);
 
 static struct spi_driver mrf24j40_driver = {
        .driver = {
-               .of_match_table = of_match_ptr(mrf24j40_of_match),
+               .of_match_table = mrf24j40_of_match,
                .name = "mrf24j40",
        },
        .id_table = mrf24j40_ids,
index 9b6a4a8..78a01c7 100644 (file)
@@ -401,18 +401,13 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
        /* If headroom is not 0, there is an offset between the beginning of the
         * data and the allocated space, otherwise the data and the allocated
         * space are aligned.
+        *
+        * Buffers with headroom use PAGE_SIZE as alloc size, see
+        * add_recvbuf_mergeable() + get_mergeable_buf_len()
         */
-       if (headroom) {
-               /* Buffers with headroom use PAGE_SIZE as alloc size,
-                * see add_recvbuf_mergeable() + get_mergeable_buf_len()
-                */
-               truesize = PAGE_SIZE;
-               tailroom = truesize - len - offset;
-               buf = page_address(page);
-       } else {
-               tailroom = truesize - len;
-               buf = p;
-       }
+       truesize = headroom ? PAGE_SIZE : truesize;
+       tailroom = truesize - len - headroom - (hdr_padded_len - hdr_len);
+       buf = p - headroom;
 
        len -= hdr_len;
        offset += hdr_padded_len;
@@ -958,7 +953,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                                put_page(page);
                                head_skb = page_to_skb(vi, rq, xdp_page, offset,
                                                       len, PAGE_SIZE, false,
-                                                      metasize, headroom);
+                                                      metasize,
+                                                      VIRTIO_XDP_HEADROOM);
                                return head_skb;
                        }
                        break;
index fc52b2c..dbe1f85 100644 (file)
@@ -1,5 +1,4 @@
-ccflags-y := -O3
-ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
+ccflags-y := -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
 ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG
 wireguard-y := main.o
 wireguard-y += noise.o
index 3725e9c..b7197e8 100644 (file)
@@ -6,6 +6,8 @@
 #include "allowedips.h"
 #include "peer.h"
 
+static struct kmem_cache *node_cache;
+
 static void swap_endian(u8 *dst, const u8 *src, u8 bits)
 {
        if (bits == 32) {
@@ -28,8 +30,11 @@ static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
        node->bitlen = bits;
        memcpy(node->bits, src, bits / 8U);
 }
-#define CHOOSE_NODE(parent, key) \
-       parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
+
+static inline u8 choose(struct allowedips_node *node, const u8 *key)
+{
+       return (key[node->bit_at_a] >> node->bit_at_b) & 1;
+}
 
 static void push_rcu(struct allowedips_node **stack,
                     struct allowedips_node __rcu *p, unsigned int *len)
@@ -40,6 +45,11 @@ static void push_rcu(struct allowedips_node **stack,
        }
 }
 
+static void node_free_rcu(struct rcu_head *rcu)
+{
+       kmem_cache_free(node_cache, container_of(rcu, struct allowedips_node, rcu));
+}
+
 static void root_free_rcu(struct rcu_head *rcu)
 {
        struct allowedips_node *node, *stack[128] = {
@@ -49,7 +59,7 @@ static void root_free_rcu(struct rcu_head *rcu)
        while (len > 0 && (node = stack[--len])) {
                push_rcu(stack, node->bit[0], &len);
                push_rcu(stack, node->bit[1], &len);
-               kfree(node);
+               kmem_cache_free(node_cache, node);
        }
 }
 
@@ -66,60 +76,6 @@ static void root_remove_peer_lists(struct allowedips_node *root)
        }
 }
 
-static void walk_remove_by_peer(struct allowedips_node __rcu **top,
-                               struct wg_peer *peer, struct mutex *lock)
-{
-#define REF(p) rcu_access_pointer(p)
-#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
-#define PUSH(p) ({                                                             \
-               WARN_ON(IS_ENABLED(DEBUG) && len >= 128);                      \
-               stack[len++] = p;                                              \
-       })
-
-       struct allowedips_node __rcu **stack[128], **nptr;
-       struct allowedips_node *node, *prev;
-       unsigned int len;
-
-       if (unlikely(!peer || !REF(*top)))
-               return;
-
-       for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
-               nptr = stack[len - 1];
-               node = DEREF(nptr);
-               if (!node) {
-                       --len;
-                       continue;
-               }
-               if (!prev || REF(prev->bit[0]) == node ||
-                   REF(prev->bit[1]) == node) {
-                       if (REF(node->bit[0]))
-                               PUSH(&node->bit[0]);
-                       else if (REF(node->bit[1]))
-                               PUSH(&node->bit[1]);
-               } else if (REF(node->bit[0]) == prev) {
-                       if (REF(node->bit[1]))
-                               PUSH(&node->bit[1]);
-               } else {
-                       if (rcu_dereference_protected(node->peer,
-                               lockdep_is_held(lock)) == peer) {
-                               RCU_INIT_POINTER(node->peer, NULL);
-                               list_del_init(&node->peer_list);
-                               if (!node->bit[0] || !node->bit[1]) {
-                                       rcu_assign_pointer(*nptr, DEREF(
-                                              &node->bit[!REF(node->bit[0])]));
-                                       kfree_rcu(node, rcu);
-                                       node = DEREF(nptr);
-                               }
-                       }
-                       --len;
-               }
-       }
-
-#undef REF
-#undef DEREF
-#undef PUSH
-}
-
 static unsigned int fls128(u64 a, u64 b)
 {
        return a ? fls64(a) + 64U : fls64(b);
@@ -159,7 +115,7 @@ static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits,
                        found = node;
                if (node->cidr == bits)
                        break;
-               node = rcu_dereference_bh(CHOOSE_NODE(node, key));
+               node = rcu_dereference_bh(node->bit[choose(node, key)]);
        }
        return found;
 }
@@ -191,8 +147,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
                           u8 cidr, u8 bits, struct allowedips_node **rnode,
                           struct mutex *lock)
 {
-       struct allowedips_node *node = rcu_dereference_protected(trie,
-                                               lockdep_is_held(lock));
+       struct allowedips_node *node = rcu_dereference_protected(trie, lockdep_is_held(lock));
        struct allowedips_node *parent = NULL;
        bool exact = false;
 
@@ -202,13 +157,24 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
                        exact = true;
                        break;
                }
-               node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
-                                                lockdep_is_held(lock));
+               node = rcu_dereference_protected(parent->bit[choose(parent, key)], lockdep_is_held(lock));
        }
        *rnode = parent;
        return exact;
 }
 
+static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node)
+{
+       node->parent_bit_packed = (unsigned long)parent | bit;
+       rcu_assign_pointer(*parent, node);
+}
+
+static inline void choose_and_connect_node(struct allowedips_node *parent, struct allowedips_node *node)
+{
+       u8 bit = choose(parent, node->bits);
+       connect_node(&parent->bit[bit], bit, node);
+}
+
 static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
               u8 cidr, struct wg_peer *peer, struct mutex *lock)
 {
@@ -218,13 +184,13 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
                return -EINVAL;
 
        if (!rcu_access_pointer(*trie)) {
-               node = kzalloc(sizeof(*node), GFP_KERNEL);
+               node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
                if (unlikely(!node))
                        return -ENOMEM;
                RCU_INIT_POINTER(node->peer, peer);
                list_add_tail(&node->peer_list, &peer->allowedips_list);
                copy_and_assign_cidr(node, key, cidr, bits);
-               rcu_assign_pointer(*trie, node);
+               connect_node(trie, 2, node);
                return 0;
        }
        if (node_placement(*trie, key, cidr, bits, &node, lock)) {
@@ -233,7 +199,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
                return 0;
        }
 
-       newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
+       newnode = kmem_cache_zalloc(node_cache, GFP_KERNEL);
        if (unlikely(!newnode))
                return -ENOMEM;
        RCU_INIT_POINTER(newnode->peer, peer);
@@ -243,10 +209,10 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
        if (!node) {
                down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
        } else {
-               down = rcu_dereference_protected(CHOOSE_NODE(node, key),
-                                                lockdep_is_held(lock));
+               const u8 bit = choose(node, key);
+               down = rcu_dereference_protected(node->bit[bit], lockdep_is_held(lock));
                if (!down) {
-                       rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
+                       connect_node(&node->bit[bit], bit, newnode);
                        return 0;
                }
        }
@@ -254,30 +220,29 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
        parent = node;
 
        if (newnode->cidr == cidr) {
-               rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
+               choose_and_connect_node(newnode, down);
                if (!parent)
-                       rcu_assign_pointer(*trie, newnode);
+                       connect_node(trie, 2, newnode);
                else
-                       rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
-                                          newnode);
-       } else {
-               node = kzalloc(sizeof(*node), GFP_KERNEL);
-               if (unlikely(!node)) {
-                       list_del(&newnode->peer_list);
-                       kfree(newnode);
-                       return -ENOMEM;
-               }
-               INIT_LIST_HEAD(&node->peer_list);
-               copy_and_assign_cidr(node, newnode->bits, cidr, bits);
+                       choose_and_connect_node(parent, newnode);
+               return 0;
+       }
 
-               rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
-               rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
-               if (!parent)
-                       rcu_assign_pointer(*trie, node);
-               else
-                       rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
-                                          node);
+       node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
+       if (unlikely(!node)) {
+               list_del(&newnode->peer_list);
+               kmem_cache_free(node_cache, newnode);
+               return -ENOMEM;
        }
+       INIT_LIST_HEAD(&node->peer_list);
+       copy_and_assign_cidr(node, newnode->bits, cidr, bits);
+
+       choose_and_connect_node(node, down);
+       choose_and_connect_node(node, newnode);
+       if (!parent)
+               connect_node(trie, 2, node);
+       else
+               choose_and_connect_node(parent, node);
        return 0;
 }
 
@@ -335,9 +300,41 @@ int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
 void wg_allowedips_remove_by_peer(struct allowedips *table,
                                  struct wg_peer *peer, struct mutex *lock)
 {
+       struct allowedips_node *node, *child, **parent_bit, *parent, *tmp;
+       bool free_parent;
+
+       if (list_empty(&peer->allowedips_list))
+               return;
        ++table->seq;
-       walk_remove_by_peer(&table->root4, peer, lock);
-       walk_remove_by_peer(&table->root6, peer, lock);
+       list_for_each_entry_safe(node, tmp, &peer->allowedips_list, peer_list) {
+               list_del_init(&node->peer_list);
+               RCU_INIT_POINTER(node->peer, NULL);
+               if (node->bit[0] && node->bit[1])
+                       continue;
+               child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])],
+                                                 lockdep_is_held(lock));
+               if (child)
+                       child->parent_bit_packed = node->parent_bit_packed;
+               parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL);
+               *parent_bit = child;
+               parent = (void *)parent_bit -
+                        offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]);
+               free_parent = !rcu_access_pointer(node->bit[0]) &&
+                             !rcu_access_pointer(node->bit[1]) &&
+                             (node->parent_bit_packed & 3) <= 1 &&
+                             !rcu_access_pointer(parent->peer);
+               if (free_parent)
+                       child = rcu_dereference_protected(
+                                       parent->bit[!(node->parent_bit_packed & 1)],
+                                       lockdep_is_held(lock));
+               call_rcu(&node->rcu, node_free_rcu);
+               if (!free_parent)
+                       continue;
+               if (child)
+                       child->parent_bit_packed = parent->parent_bit_packed;
+               *(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child;
+               call_rcu(&parent->rcu, node_free_rcu);
+       }
 }
 
 int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
@@ -374,4 +371,16 @@ struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
        return NULL;
 }
 
+int __init wg_allowedips_slab_init(void)
+{
+       node_cache = KMEM_CACHE(allowedips_node, 0);
+       return node_cache ? 0 : -ENOMEM;
+}
+
+void wg_allowedips_slab_uninit(void)
+{
+       rcu_barrier();
+       kmem_cache_destroy(node_cache);
+}
+
 #include "selftest/allowedips.c"
index e5c83ca..2346c79 100644 (file)
@@ -15,14 +15,11 @@ struct wg_peer;
 struct allowedips_node {
        struct wg_peer __rcu *peer;
        struct allowedips_node __rcu *bit[2];
-       /* While it may seem scandalous that we waste space for v4,
-        * we're alloc'ing to the nearest power of 2 anyway, so this
-        * doesn't actually make a difference.
-        */
-       u8 bits[16] __aligned(__alignof(u64));
        u8 cidr, bit_at_a, bit_at_b, bitlen;
+       u8 bits[16] __aligned(__alignof(u64));
 
-       /* Keep rarely used list at bottom to be beyond cache line. */
+       /* Keep rarely used members at bottom to be beyond cache line. */
+       unsigned long parent_bit_packed;
        union {
                struct list_head peer_list;
                struct rcu_head rcu;
@@ -33,7 +30,7 @@ struct allowedips {
        struct allowedips_node __rcu *root4;
        struct allowedips_node __rcu *root6;
        u64 seq;
-};
+} __aligned(4); /* We pack the lower 2 bits of &root, but m68k only gives 16-bit alignment. */
 
 void wg_allowedips_init(struct allowedips *table);
 void wg_allowedips_free(struct allowedips *table, struct mutex *mutex);
@@ -56,4 +53,7 @@ struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
 bool wg_allowedips_selftest(void);
 #endif
 
+int wg_allowedips_slab_init(void);
+void wg_allowedips_slab_uninit(void);
+
 #endif /* _WG_ALLOWEDIPS_H */
index 7a7d5f1..75dbe77 100644 (file)
@@ -21,13 +21,22 @@ static int __init mod_init(void)
 {
        int ret;
 
+       ret = wg_allowedips_slab_init();
+       if (ret < 0)
+               goto err_allowedips;
+
 #ifdef DEBUG
+       ret = -ENOTRECOVERABLE;
        if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() ||
            !wg_ratelimiter_selftest())
-               return -ENOTRECOVERABLE;
+               goto err_peer;
 #endif
        wg_noise_init();
 
+       ret = wg_peer_init();
+       if (ret < 0)
+               goto err_peer;
+
        ret = wg_device_init();
        if (ret < 0)
                goto err_device;
@@ -44,6 +53,10 @@ static int __init mod_init(void)
 err_netlink:
        wg_device_uninit();
 err_device:
+       wg_peer_uninit();
+err_peer:
+       wg_allowedips_slab_uninit();
+err_allowedips:
        return ret;
 }
 
@@ -51,6 +64,8 @@ static void __exit mod_exit(void)
 {
        wg_genetlink_uninit();
        wg_device_uninit();
+       wg_peer_uninit();
+       wg_allowedips_slab_uninit();
 }
 
 module_init(mod_init);
index cd5cb02..1acd00a 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/rcupdate.h>
 #include <linux/list.h>
 
+static struct kmem_cache *peer_cache;
 static atomic64_t peer_counter = ATOMIC64_INIT(0);
 
 struct wg_peer *wg_peer_create(struct wg_device *wg,
@@ -29,10 +30,10 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
        if (wg->num_peers >= MAX_PEERS_PER_DEVICE)
                return ERR_PTR(ret);
 
-       peer = kzalloc(sizeof(*peer), GFP_KERNEL);
+       peer = kmem_cache_zalloc(peer_cache, GFP_KERNEL);
        if (unlikely(!peer))
                return ERR_PTR(ret);
-       if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
+       if (unlikely(dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)))
                goto err;
 
        peer->device = wg;
@@ -64,7 +65,7 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
        return peer;
 
 err:
-       kfree(peer);
+       kmem_cache_free(peer_cache, peer);
        return ERR_PTR(ret);
 }
 
@@ -88,7 +89,7 @@ static void peer_make_dead(struct wg_peer *peer)
        /* Mark as dead, so that we don't allow jumping contexts after. */
        WRITE_ONCE(peer->is_dead, true);
 
-       /* The caller must now synchronize_rcu() for this to take effect. */
+       /* The caller must now synchronize_net() for this to take effect. */
 }
 
 static void peer_remove_after_dead(struct wg_peer *peer)
@@ -160,7 +161,7 @@ void wg_peer_remove(struct wg_peer *peer)
        lockdep_assert_held(&peer->device->device_update_lock);
 
        peer_make_dead(peer);
-       synchronize_rcu();
+       synchronize_net();
        peer_remove_after_dead(peer);
 }
 
@@ -178,7 +179,7 @@ void wg_peer_remove_all(struct wg_device *wg)
                peer_make_dead(peer);
                list_add_tail(&peer->peer_list, &dead_peers);
        }
-       synchronize_rcu();
+       synchronize_net();
        list_for_each_entry_safe(peer, temp, &dead_peers, peer_list)
                peer_remove_after_dead(peer);
 }
@@ -193,7 +194,8 @@ static void rcu_release(struct rcu_head *rcu)
        /* The final zeroing takes care of clearing any remaining handshake key
         * material and other potentially sensitive information.
         */
-       kfree_sensitive(peer);
+       memzero_explicit(peer, sizeof(*peer));
+       kmem_cache_free(peer_cache, peer);
 }
 
 static void kref_release(struct kref *refcount)
@@ -225,3 +227,14 @@ void wg_peer_put(struct wg_peer *peer)
                return;
        kref_put(&peer->refcount, kref_release);
 }
+
+int __init wg_peer_init(void)
+{
+       peer_cache = KMEM_CACHE(wg_peer, 0);
+       return peer_cache ? 0 : -ENOMEM;
+}
+
+void wg_peer_uninit(void)
+{
+       kmem_cache_destroy(peer_cache);
+}
index 8d53b68..76e4d31 100644 (file)
@@ -80,4 +80,7 @@ void wg_peer_put(struct wg_peer *peer);
 void wg_peer_remove(struct wg_peer *peer);
 void wg_peer_remove_all(struct wg_device *wg);
 
+int wg_peer_init(void);
+void wg_peer_uninit(void);
+
 #endif /* _WG_PEER_H */
index 846db14..e173204 100644 (file)
 
 #include <linux/siphash.h>
 
-static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits,
-                                             u8 cidr)
-{
-       swap_endian(dst, src, bits);
-       memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8);
-       if (cidr)
-               dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8);
-}
-
 static __init void print_node(struct allowedips_node *node, u8 bits)
 {
        char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
-       char *fmt_declaration = KERN_DEBUG
-               "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
+       char *fmt_declaration = KERN_DEBUG "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
+       u8 ip1[16], ip2[16], cidr1, cidr2;
        char *style = "dotted";
-       u8 ip1[16], ip2[16];
        u32 color = 0;
 
+       if (node == NULL)
+               return;
        if (bits == 32) {
                fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
-               fmt_declaration = KERN_DEBUG
-                       "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
+               fmt_declaration = KERN_DEBUG "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
        } else if (bits == 128) {
                fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
-               fmt_declaration = KERN_DEBUG
-                       "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
+               fmt_declaration = KERN_DEBUG "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
        }
        if (node->peer) {
                hsiphash_key_t key = { { 0 } };
@@ -55,24 +45,20 @@ static __init void print_node(struct allowedips_node *node, u8 bits)
                        hsiphash_1u32(0xabad1dea, &key) % 200;
                style = "bold";
        }
-       swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr);
-       printk(fmt_declaration, ip1, node->cidr, style, color);
+       wg_allowedips_read_node(node, ip1, &cidr1);
+       printk(fmt_declaration, ip1, cidr1, style, color);
        if (node->bit[0]) {
-               swap_endian_and_apply_cidr(ip2,
-                               rcu_dereference_raw(node->bit[0])->bits, bits,
-                               node->cidr);
-               printk(fmt_connection, ip1, node->cidr, ip2,
-                      rcu_dereference_raw(node->bit[0])->cidr);
-               print_node(rcu_dereference_raw(node->bit[0]), bits);
+               wg_allowedips_read_node(rcu_dereference_raw(node->bit[0]), ip2, &cidr2);
+               printk(fmt_connection, ip1, cidr1, ip2, cidr2);
        }
        if (node->bit[1]) {
-               swap_endian_and_apply_cidr(ip2,
-                               rcu_dereference_raw(node->bit[1])->bits,
-                               bits, node->cidr);
-               printk(fmt_connection, ip1, node->cidr, ip2,
-                      rcu_dereference_raw(node->bit[1])->cidr);
-               print_node(rcu_dereference_raw(node->bit[1]), bits);
+               wg_allowedips_read_node(rcu_dereference_raw(node->bit[1]), ip2, &cidr2);
+               printk(fmt_connection, ip1, cidr1, ip2, cidr2);
        }
+       if (node->bit[0])
+               print_node(rcu_dereference_raw(node->bit[0]), bits);
+       if (node->bit[1])
+               print_node(rcu_dereference_raw(node->bit[1]), bits);
 }
 
 static __init void print_tree(struct allowedips_node __rcu *top, u8 bits)
@@ -121,8 +107,8 @@ static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr)
 {
        union nf_inet_addr mask;
 
-       memset(&mask, 0x00, 128 / 8);
-       memset(&mask, 0xff, cidr / 8);
+       memset(&mask, 0, sizeof(mask));
+       memset(&mask.all, 0xff, cidr / 8);
        if (cidr % 32)
                mask.all[cidr / 32] = (__force u32)htonl(
                        (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
@@ -149,42 +135,36 @@ horrible_mask_self(struct horrible_allowedips_node *node)
 }
 
 static __init inline bool
-horrible_match_v4(const struct horrible_allowedips_node *node,
-                 struct in_addr *ip)
+horrible_match_v4(const struct horrible_allowedips_node *node, struct in_addr *ip)
 {
        return (ip->s_addr & node->mask.ip) == node->ip.ip;
 }
 
 static __init inline bool
-horrible_match_v6(const struct horrible_allowedips_node *node,
-                 struct in6_addr *ip)
+horrible_match_v6(const struct horrible_allowedips_node *node, struct in6_addr *ip)
 {
-       return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) ==
-                      node->ip.ip6[0] &&
-              (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) ==
-                      node->ip.ip6[1] &&
-              (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) ==
-                      node->ip.ip6[2] &&
+       return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == node->ip.ip6[0] &&
+              (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == node->ip.ip6[1] &&
+              (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == node->ip.ip6[2] &&
               (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3];
 }
 
 static __init void
-horrible_insert_ordered(struct horrible_allowedips *table,
-                       struct horrible_allowedips_node *node)
+horrible_insert_ordered(struct horrible_allowedips *table, struct horrible_allowedips_node *node)
 {
        struct horrible_allowedips_node *other = NULL, *where = NULL;
        u8 my_cidr = horrible_mask_to_cidr(node->mask);
 
        hlist_for_each_entry(other, &table->head, table) {
-               if (!memcmp(&other->mask, &node->mask,
-                           sizeof(union nf_inet_addr)) &&
-                   !memcmp(&other->ip, &node->ip,
-                           sizeof(union nf_inet_addr)) &&
-                   other->ip_version == node->ip_version) {
+               if (other->ip_version == node->ip_version &&
+                   !memcmp(&other->mask, &node->mask, sizeof(union nf_inet_addr)) &&
+                   !memcmp(&other->ip, &node->ip, sizeof(union nf_inet_addr))) {
                        other->value = node->value;
                        kfree(node);
                        return;
                }
+       }
+       hlist_for_each_entry(other, &table->head, table) {
                where = other;
                if (horrible_mask_to_cidr(other->mask) <= my_cidr)
                        break;
@@ -201,8 +181,7 @@ static __init int
 horrible_allowedips_insert_v4(struct horrible_allowedips *table,
                              struct in_addr *ip, u8 cidr, void *value)
 {
-       struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
-                                                       GFP_KERNEL);
+       struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
 
        if (unlikely(!node))
                return -ENOMEM;
@@ -219,8 +198,7 @@ static __init int
 horrible_allowedips_insert_v6(struct horrible_allowedips *table,
                              struct in6_addr *ip, u8 cidr, void *value)
 {
-       struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
-                                                       GFP_KERNEL);
+       struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
 
        if (unlikely(!node))
                return -ENOMEM;
@@ -234,39 +212,43 @@ horrible_allowedips_insert_v6(struct horrible_allowedips *table,
 }
 
 static __init void *
-horrible_allowedips_lookup_v4(struct horrible_allowedips *table,
-                             struct in_addr *ip)
+horrible_allowedips_lookup_v4(struct horrible_allowedips *table, struct in_addr *ip)
 {
        struct horrible_allowedips_node *node;
-       void *ret = NULL;
 
        hlist_for_each_entry(node, &table->head, table) {
-               if (node->ip_version != 4)
-                       continue;
-               if (horrible_match_v4(node, ip)) {
-                       ret = node->value;
-                       break;
-               }
+               if (node->ip_version == 4 && horrible_match_v4(node, ip))
+                       return node->value;
        }
-       return ret;
+       return NULL;
 }
 
 static __init void *
-horrible_allowedips_lookup_v6(struct horrible_allowedips *table,
-                             struct in6_addr *ip)
+horrible_allowedips_lookup_v6(struct horrible_allowedips *table, struct in6_addr *ip)
 {
        struct horrible_allowedips_node *node;
-       void *ret = NULL;
 
        hlist_for_each_entry(node, &table->head, table) {
-               if (node->ip_version != 6)
+               if (node->ip_version == 6 && horrible_match_v6(node, ip))
+                       return node->value;
+       }
+       return NULL;
+}
+
+
+static __init void
+horrible_allowedips_remove_by_value(struct horrible_allowedips *table, void *value)
+{
+       struct horrible_allowedips_node *node;
+       struct hlist_node *h;
+
+       hlist_for_each_entry_safe(node, h, &table->head, table) {
+               if (node->value != value)
                        continue;
-               if (horrible_match_v6(node, ip)) {
-                       ret = node->value;
-                       break;
-               }
+               hlist_del(&node->table);
+               kfree(node);
        }
-       return ret;
+
 }
 
 static __init bool randomized_test(void)
@@ -296,6 +278,7 @@ static __init bool randomized_test(void)
                        goto free;
                }
                kref_init(&peers[i]->refcount);
+               INIT_LIST_HEAD(&peers[i]->allowedips_list);
        }
 
        mutex_lock(&mutex);
@@ -333,7 +316,7 @@ static __init bool randomized_test(void)
                        if (wg_allowedips_insert_v4(&t,
                                                    (struct in_addr *)mutated,
                                                    cidr, peer, &mutex) < 0) {
-                               pr_err("allowedips random malloc: FAIL\n");
+                               pr_err("allowedips random self-test malloc: FAIL\n");
                                goto free_locked;
                        }
                        if (horrible_allowedips_insert_v4(&h,
@@ -396,23 +379,33 @@ static __init bool randomized_test(void)
                print_tree(t.root6, 128);
        }
 
-       for (i = 0; i < NUM_QUERIES; ++i) {
-               prandom_bytes(ip, 4);
-               if (lookup(t.root4, 32, ip) !=
-                   horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
-                       pr_err("allowedips random self-test: FAIL\n");
-                       goto free;
+       for (j = 0;; ++j) {
+               for (i = 0; i < NUM_QUERIES; ++i) {
+                       prandom_bytes(ip, 4);
+                       if (lookup(t.root4, 32, ip) != horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
+                               horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip);
+                               pr_err("allowedips random v4 self-test: FAIL\n");
+                               goto free;
+                       }
+                       prandom_bytes(ip, 16);
+                       if (lookup(t.root6, 128, ip) != horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
+                               pr_err("allowedips random v6 self-test: FAIL\n");
+                               goto free;
+                       }
                }
+               if (j >= NUM_PEERS)
+                       break;
+               mutex_lock(&mutex);
+               wg_allowedips_remove_by_peer(&t, peers[j], &mutex);
+               mutex_unlock(&mutex);
+               horrible_allowedips_remove_by_value(&h, peers[j]);
        }
 
-       for (i = 0; i < NUM_QUERIES; ++i) {
-               prandom_bytes(ip, 16);
-               if (lookup(t.root6, 128, ip) !=
-                   horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
-                       pr_err("allowedips random self-test: FAIL\n");
-                       goto free;
-               }
+       if (t.root4 || t.root6) {
+               pr_err("allowedips random self-test removal: FAIL\n");
+               goto free;
        }
+
        ret = true;
 
 free:
index d9ad850..8c496b7 100644 (file)
@@ -430,7 +430,7 @@ void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
        if (new4)
                wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
        mutex_unlock(&wg->socket_update_lock);
-       synchronize_rcu();
+       synchronize_net();
        sock_free(old4);
        sock_free(old6);
 }
index 977acab..03fe628 100644 (file)
@@ -514,10 +514,36 @@ EXPORT_SYMBOL_GPL(mt76_free_device);
 static void mt76_rx_release_amsdu(struct mt76_phy *phy, enum mt76_rxq_id q)
 {
        struct sk_buff *skb = phy->rx_amsdu[q].head;
+       struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
        struct mt76_dev *dev = phy->dev;
 
        phy->rx_amsdu[q].head = NULL;
        phy->rx_amsdu[q].tail = NULL;
+
+       /*
+        * Validate if the amsdu has a proper first subframe.
+        * A single MSDU can be parsed as A-MSDU when the unauthenticated A-MSDU
+        * flag of the QoS header gets flipped. In such cases, the first
+        * subframe has a LLC/SNAP header in the location of the destination
+        * address.
+        */
+       if (skb_shinfo(skb)->frag_list) {
+               int offset = 0;
+
+               if (!(status->flag & RX_FLAG_8023)) {
+                       offset = ieee80211_get_hdrlen_from_skb(skb);
+
+                       if ((status->flag &
+                            (RX_FLAG_DECRYPTED | RX_FLAG_IV_STRIPPED)) ==
+                           RX_FLAG_DECRYPTED)
+                               offset += 8;
+               }
+
+               if (ether_addr_equal(skb->data + offset, rfc1042_header)) {
+                       dev_kfree_skb(skb);
+                       return;
+               }
+       }
        __skb_queue_tail(&dev->rx_skb[q], skb);
 }
 
index 86341d1..d20f05a 100644 (file)
@@ -510,7 +510,6 @@ void mt7615_init_device(struct mt7615_dev *dev)
        mutex_init(&dev->pm.mutex);
        init_waitqueue_head(&dev->pm.wait);
        spin_lock_init(&dev->pm.txq_lock);
-       set_bit(MT76_STATE_PM, &dev->mphy.state);
        INIT_DELAYED_WORK(&dev->mphy.mac_work, mt7615_mac_work);
        INIT_DELAYED_WORK(&dev->phy.scan_work, mt7615_scan_work);
        INIT_DELAYED_WORK(&dev->coredump.work, mt7615_coredump_work);
index f81a17d..e2dcfee 100644 (file)
@@ -1912,8 +1912,9 @@ void mt7615_pm_wake_work(struct work_struct *work)
                        napi_schedule(&dev->mt76.napi[i]);
                mt76_connac_pm_dequeue_skbs(mphy, &dev->pm);
                mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[MT_MCUQ_WM], false);
-               ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
-                                            MT7615_WATCHDOG_TIME);
+               if (test_bit(MT76_STATE_RUNNING, &mphy->state))
+                       ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
+                                                    MT7615_WATCHDOG_TIME);
        }
 
        ieee80211_wake_queues(mphy->hw);
index 17fe418..d1be78b 100644 (file)
@@ -51,16 +51,13 @@ mt7663s_mcu_send_message(struct mt76_dev *mdev, struct sk_buff *skb,
        return ret;
 }
 
-static int mt7663s_mcu_drv_pmctrl(struct mt7615_dev *dev)
+static int __mt7663s_mcu_drv_pmctrl(struct mt7615_dev *dev)
 {
        struct sdio_func *func = dev->mt76.sdio.func;
        struct mt76_phy *mphy = &dev->mt76.phy;
        u32 status;
        int ret;
 
-       if (!test_and_clear_bit(MT76_STATE_PM, &mphy->state))
-               goto out;
-
        sdio_claim_host(func);
 
        sdio_writel(func, WHLPCR_FW_OWN_REQ_CLR, MCR_WHLPCR, NULL);
@@ -76,13 +73,21 @@ static int mt7663s_mcu_drv_pmctrl(struct mt7615_dev *dev)
        }
 
        sdio_release_host(func);
-
-out:
        dev->pm.last_activity = jiffies;
 
        return 0;
 }
 
+static int mt7663s_mcu_drv_pmctrl(struct mt7615_dev *dev)
+{
+       struct mt76_phy *mphy = &dev->mt76.phy;
+
+       if (test_and_clear_bit(MT76_STATE_PM, &mphy->state))
+               return __mt7663s_mcu_drv_pmctrl(dev);
+
+       return 0;
+}
+
 static int mt7663s_mcu_fw_pmctrl(struct mt7615_dev *dev)
 {
        struct sdio_func *func = dev->mt76.sdio.func;
@@ -123,7 +128,7 @@ int mt7663s_mcu_init(struct mt7615_dev *dev)
        struct mt7615_mcu_ops *mcu_ops;
        int ret;
 
-       ret = mt7663s_mcu_drv_pmctrl(dev);
+       ret = __mt7663s_mcu_drv_pmctrl(dev);
        if (ret)
                return ret;
 
index c55698f..028ff43 100644 (file)
@@ -55,10 +55,7 @@ int mt7663u_mcu_init(struct mt7615_dev *dev)
 
        dev->mt76.mcu_ops = &mt7663u_mcu_ops,
 
-       /* usb does not support runtime-pm */
-       clear_bit(MT76_STATE_PM, &dev->mphy.state);
        mt76_set(dev, MT_UDMA_TX_QSEL, MT_FW_DL_EN);
-
        if (test_and_clear_bit(MT76_STATE_POWER_OFF, &dev->mphy.state)) {
                mt7615_mcu_restart(&dev->mt76);
                if (!mt76_poll_msec(dev, MT_CONN_ON_MISC,
index fe0ab5e..6195616 100644 (file)
@@ -721,6 +721,10 @@ void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
        phy->phy_type = mt76_connac_get_phy_mode_v2(mphy, vif, band, sta);
        phy->basic_rate = cpu_to_le16((u16)vif->bss_conf.basic_rates);
        phy->rcpi = rcpi;
+       phy->ampdu = FIELD_PREP(IEEE80211_HT_AMPDU_PARM_FACTOR,
+                               sta->ht_cap.ampdu_factor) |
+                    FIELD_PREP(IEEE80211_HT_AMPDU_PARM_DENSITY,
+                               sta->ht_cap.ampdu_density);
 
        tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_RA, sizeof(*ra_info));
        ra_info = (struct sta_rec_ra_info *)tlv;
index 5847f94..b795e72 100644 (file)
@@ -87,7 +87,7 @@ static const struct ieee80211_ops mt76x0e_ops = {
        .reconfig_complete = mt76x02_reconfig_complete,
 };
 
-static int mt76x0e_register_device(struct mt76x02_dev *dev)
+static int mt76x0e_init_hardware(struct mt76x02_dev *dev, bool resume)
 {
        int err;
 
@@ -100,9 +100,11 @@ static int mt76x0e_register_device(struct mt76x02_dev *dev)
        if (err < 0)
                return err;
 
-       err = mt76x02_dma_init(dev);
-       if (err < 0)
-               return err;
+       if (!resume) {
+               err = mt76x02_dma_init(dev);
+               if (err < 0)
+                       return err;
+       }
 
        err = mt76x0_init_hardware(dev);
        if (err < 0)
@@ -123,6 +125,17 @@ static int mt76x0e_register_device(struct mt76x02_dev *dev)
        mt76_clear(dev, 0x110, BIT(9));
        mt76_set(dev, MT_MAX_LEN_CFG, BIT(13));
 
+       return 0;
+}
+
+static int mt76x0e_register_device(struct mt76x02_dev *dev)
+{
+       int err;
+
+       err = mt76x0e_init_hardware(dev, false);
+       if (err < 0)
+               return err;
+
        err = mt76x0_register_device(dev);
        if (err < 0)
                return err;
@@ -167,6 +180,8 @@ mt76x0e_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (ret)
                return ret;
 
+       mt76_pci_disable_aspm(pdev);
+
        mdev = mt76_alloc_device(&pdev->dev, sizeof(*dev), &mt76x0e_ops,
                                 &drv_ops);
        if (!mdev)
@@ -220,6 +235,60 @@ mt76x0e_remove(struct pci_dev *pdev)
        mt76_free_device(mdev);
 }
 
+#ifdef CONFIG_PM
+static int mt76x0e_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       struct mt76_dev *mdev = pci_get_drvdata(pdev);
+       struct mt76x02_dev *dev = container_of(mdev, struct mt76x02_dev, mt76);
+       int i;
+
+       mt76_worker_disable(&mdev->tx_worker);
+       for (i = 0; i < ARRAY_SIZE(mdev->phy.q_tx); i++)
+               mt76_queue_tx_cleanup(dev, mdev->phy.q_tx[i], true);
+       for (i = 0; i < ARRAY_SIZE(mdev->q_mcu); i++)
+               mt76_queue_tx_cleanup(dev, mdev->q_mcu[i], true);
+       napi_disable(&mdev->tx_napi);
+
+       mt76_for_each_q_rx(mdev, i)
+               napi_disable(&mdev->napi[i]);
+
+       mt76x02_dma_disable(dev);
+       mt76x02_mcu_cleanup(dev);
+       mt76x0_chip_onoff(dev, false, false);
+
+       pci_enable_wake(pdev, pci_choose_state(pdev, state), true);
+       pci_save_state(pdev);
+
+       return pci_set_power_state(pdev, pci_choose_state(pdev, state));
+}
+
+static int mt76x0e_resume(struct pci_dev *pdev)
+{
+       struct mt76_dev *mdev = pci_get_drvdata(pdev);
+       struct mt76x02_dev *dev = container_of(mdev, struct mt76x02_dev, mt76);
+       int err, i;
+
+       err = pci_set_power_state(pdev, PCI_D0);
+       if (err)
+               return err;
+
+       pci_restore_state(pdev);
+
+       mt76_worker_enable(&mdev->tx_worker);
+
+       mt76_for_each_q_rx(mdev, i) {
+               mt76_queue_rx_reset(dev, i);
+               napi_enable(&mdev->napi[i]);
+               napi_schedule(&mdev->napi[i]);
+       }
+
+       napi_enable(&mdev->tx_napi);
+       napi_schedule(&mdev->tx_napi);
+
+       return mt76x0e_init_hardware(dev, true);
+}
+#endif /* CONFIG_PM */
+
 static const struct pci_device_id mt76x0e_device_table[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7610) },
        { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7630) },
@@ -237,6 +306,10 @@ static struct pci_driver mt76x0e_driver = {
        .id_table       = mt76x0e_device_table,
        .probe          = mt76x0e_probe,
        .remove         = mt76x0e_remove,
+#ifdef CONFIG_PM
+       .suspend        = mt76x0e_suspend,
+       .resume         = mt76x0e_resume,
+#endif /* CONFIG_PM */
 };
 
 module_pci_driver(mt76x0e_driver);
index fe28bf4..1763ea0 100644 (file)
@@ -76,8 +76,8 @@ mt7921_init_wiphy(struct ieee80211_hw *hw)
        struct wiphy *wiphy = hw->wiphy;
 
        hw->queues = 4;
-       hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
-       hw->max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
+       hw->max_rx_aggregation_subframes = 64;
+       hw->max_tx_aggregation_subframes = 128;
 
        hw->radiotap_timestamp.units_pos =
                IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US;
index 214bd18..decf2d5 100644 (file)
@@ -1404,8 +1404,9 @@ void mt7921_pm_wake_work(struct work_struct *work)
                        napi_schedule(&dev->mt76.napi[i]);
                mt76_connac_pm_dequeue_skbs(mphy, &dev->pm);
                mt7921_tx_cleanup(dev);
-               ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
-                                            MT7921_WATCHDOG_TIME);
+               if (test_bit(MT76_STATE_RUNNING, &mphy->state))
+                       ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
+                                                    MT7921_WATCHDOG_TIME);
        }
 
        ieee80211_wake_queues(mphy->hw);
index f4c27aa..97a0ef3 100644 (file)
@@ -74,8 +74,7 @@ mt7921_init_he_caps(struct mt7921_phy *phy, enum nl80211_band band,
                                IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G;
                else if (band == NL80211_BAND_5GHZ)
                        he_cap_elem->phy_cap_info[0] =
-                               IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
-                               IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
+                               IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G;
 
                he_cap_elem->phy_cap_info[1] =
                        IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD;
index 5f3d56d..67dc4b4 100644 (file)
@@ -402,20 +402,22 @@ static void
 mt7921_mcu_tx_rate_report(struct mt7921_dev *dev, struct sk_buff *skb,
                          u16 wlan_idx)
 {
-       struct mt7921_mcu_wlan_info_event *wtbl_info =
-               (struct mt7921_mcu_wlan_info_event *)(skb->data);
-       struct rate_info rate = {};
-       u8 curr_idx = wtbl_info->rate_info.rate_idx;
-       u16 curr = le16_to_cpu(wtbl_info->rate_info.rate[curr_idx]);
-       struct mt7921_mcu_peer_cap peer = wtbl_info->peer_cap;
+       struct mt7921_mcu_wlan_info_event *wtbl_info;
        struct mt76_phy *mphy = &dev->mphy;
        struct mt7921_sta_stats *stats;
+       struct rate_info rate = {};
        struct mt7921_sta *msta;
        struct mt76_wcid *wcid;
+       u8 idx;
 
        if (wlan_idx >= MT76_N_WCIDS)
                return;
 
+       wtbl_info = (struct mt7921_mcu_wlan_info_event *)skb->data;
+       idx = wtbl_info->rate_info.rate_idx;
+       if (idx >= ARRAY_SIZE(wtbl_info->rate_info.rate))
+               return;
+
        rcu_read_lock();
 
        wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]);
@@ -426,7 +428,8 @@ mt7921_mcu_tx_rate_report(struct mt7921_dev *dev, struct sk_buff *skb,
        stats = &msta->stats;
 
        /* current rate */
-       mt7921_mcu_tx_rate_parse(mphy, &peer, &rate, curr);
+       mt7921_mcu_tx_rate_parse(mphy, &wtbl_info->peer_cap, &rate,
+                                le16_to_cpu(wtbl_info->rate_info.rate[idx]));
        stats->tx_rate = rate;
 out:
        rcu_read_unlock();
index 565deea..8612f8f 100644 (file)
@@ -830,6 +830,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_proto_hdr);
 
 struct virtchnl_proto_hdrs {
        u8 tunnel_level;
+       u8 pad[3];
        /**
         * specify where protocol header start from.
         * 0 - from the outer layer
index 6d16eed..eb86e80 100644 (file)
@@ -1289,6 +1289,8 @@ enum mlx5_fc_bulk_alloc_bitmask {
 
 #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum))
 
+#define MLX5_FT_MAX_MULTIPATH_LEVEL 63
+
 enum {
        MLX5_STEERING_FORMAT_CONNECTX_5   = 0,
        MLX5_STEERING_FORMAT_CONNECTX_6DX = 1,
index 48ecca8..b655d86 100644 (file)
@@ -119,7 +119,7 @@ void caif_free_client(struct cflayer *adap_layer);
  * The link_support layer is used to add any Link Layer specific
  * framing.
  */
-void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
+int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
                        struct cflayer *link_support, int head_room,
                        struct cflayer **layer, int (**rcv_func)(
                                struct sk_buff *, struct net_device *,
index 2aa5e91..8819ff4 100644 (file)
@@ -62,7 +62,7 @@ void cfcnfg_remove(struct cfcnfg *cfg);
  * @fcs:       Specify if checksum is used in CAIF Framing Layer.
  * @head_room: Head space needed by link specific protocol.
  */
-void
+int
 cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
                     struct net_device *dev, struct cflayer *phy_layer,
                     enum cfcnfg_phy_preference pref,
index 14a55e0..67cce87 100644 (file)
@@ -9,4 +9,5 @@
 #include <net/caif/caif_layer.h>
 
 struct cflayer *cfserl_create(int instance, bool use_stx);
+void cfserl_release(struct cflayer *layer);
 #endif
index 27eeb61..0a5655e 100644 (file)
@@ -1506,16 +1506,10 @@ struct nft_trans_chain {
 
 struct nft_trans_table {
        bool                            update;
-       u8                              state;
-       u32                             flags;
 };
 
 #define nft_trans_table_update(trans)  \
        (((struct nft_trans_table *)trans->data)->update)
-#define nft_trans_table_state(trans)   \
-       (((struct nft_trans_table *)trans->data)->state)
-#define nft_trans_table_flags(trans)   \
-       (((struct nft_trans_table *)trans->data)->flags)
 
 struct nft_trans_elem {
        struct nft_set                  *set;
index 3eccb52..8341a8d 100644 (file)
@@ -193,7 +193,11 @@ struct tls_offload_context_tx {
        (sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX)
 
 enum tls_context_flags {
-       TLS_RX_SYNC_RUNNING = 0,
+       /* tls_device_down was called after the netdev went down, device state
+        * was released, and kTLS works in software, even though rx_conf is
+        * still TLS_HW (needed for transition).
+        */
+       TLS_RX_DEV_DEGRADED = 0,
        /* Unlike RX where resync is driven entirely by the core in TX only
         * the driver knows when things went out of sync, so we need the flag
         * to be atomic.
@@ -266,6 +270,7 @@ struct tls_context {
 
        /* cache cold stuff */
        struct proto *sk_proto;
+       struct sock *sk;
 
        void (*sk_destruct)(struct sock *sk);
 
@@ -448,6 +453,9 @@ static inline u16 tls_user_config(struct tls_context *ctx, bool tx)
 struct sk_buff *
 tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
                      struct sk_buff *skb);
+struct sk_buff *
+tls_validate_xmit_skb_sw(struct sock *sk, struct net_device *dev,
+                        struct sk_buff *skb);
 
 static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
 {
index f0c35ce..4fe842c 100644 (file)
@@ -54,7 +54,7 @@
 #define VIRTIO_ID_SOUND                        25 /* virtio sound */
 #define VIRTIO_ID_FS                   26 /* virtio filesystem */
 #define VIRTIO_ID_PMEM                 27 /* virtio pmem */
-#define VIRTIO_ID_BT                   28 /* virtio bluetooth */
 #define VIRTIO_ID_MAC80211_HWSIM       29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_BT                   40 /* virtio bluetooth */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
index 7344349..a2f1f15 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/jiffies.h>
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
+#include <linux/security.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -1069,11 +1070,13 @@ bpf_base_func_proto(enum bpf_func_id func_id)
        case BPF_FUNC_probe_read_user:
                return &bpf_probe_read_user_proto;
        case BPF_FUNC_probe_read_kernel:
-               return &bpf_probe_read_kernel_proto;
+               return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+                      NULL : &bpf_probe_read_kernel_proto;
        case BPF_FUNC_probe_read_user_str:
                return &bpf_probe_read_user_str_proto;
        case BPF_FUNC_probe_read_kernel_str:
-               return &bpf_probe_read_kernel_str_proto;
+               return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+                      NULL : &bpf_probe_read_kernel_str_proto;
        case BPF_FUNC_snprintf_btf:
                return &bpf_snprintf_btf_proto;
        case BPF_FUNC_snprintf:
index d2d7cf6..7a52bc1 100644 (file)
@@ -215,16 +215,11 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = {
 static __always_inline int
 bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
 {
-       int ret = security_locked_down(LOCKDOWN_BPF_READ);
+       int ret;
 
-       if (unlikely(ret < 0))
-               goto fail;
        ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
        if (unlikely(ret < 0))
-               goto fail;
-       return ret;
-fail:
-       memset(dst, 0, size);
+               memset(dst, 0, size);
        return ret;
 }
 
@@ -246,10 +241,7 @@ const struct bpf_func_proto bpf_probe_read_kernel_proto = {
 static __always_inline int
 bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
 {
-       int ret = security_locked_down(LOCKDOWN_BPF_READ);
-
-       if (unlikely(ret < 0))
-               goto fail;
+       int ret;
 
        /*
         * The strncpy_from_kernel_nofault() call will likely not fill the
@@ -262,11 +254,7 @@ bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
         */
        ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size);
        if (unlikely(ret < 0))
-               goto fail;
-
-       return ret;
-fail:
-       memset(dst, 0, size);
+               memset(dst, 0, size);
        return ret;
 }
 
@@ -1011,16 +999,20 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        case BPF_FUNC_probe_read_user:
                return &bpf_probe_read_user_proto;
        case BPF_FUNC_probe_read_kernel:
-               return &bpf_probe_read_kernel_proto;
+               return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+                      NULL : &bpf_probe_read_kernel_proto;
        case BPF_FUNC_probe_read_user_str:
                return &bpf_probe_read_user_str_proto;
        case BPF_FUNC_probe_read_kernel_str:
-               return &bpf_probe_read_kernel_str_proto;
+               return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+                      NULL : &bpf_probe_read_kernel_str_proto;
 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
        case BPF_FUNC_probe_read:
-               return &bpf_probe_read_compat_proto;
+               return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+                      NULL : &bpf_probe_read_compat_proto;
        case BPF_FUNC_probe_read_str:
-               return &bpf_probe_read_compat_str_proto;
+               return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+                      NULL : &bpf_probe_read_compat_str_proto;
 #endif
 #ifdef CONFIG_CGROUPS
        case BPF_FUNC_get_current_cgroup_id:
index fd12f16..7d71d10 100644 (file)
@@ -1610,8 +1610,13 @@ setup_failed:
        } else {
                /* Init failed, cleanup */
                flush_work(&hdev->tx_work);
-               flush_work(&hdev->cmd_work);
+
+               /* Since hci_rx_work() is possible to awake new cmd_work
+                * it should be flushed first to avoid unexpected call of
+                * hci_cmd_work()
+                */
                flush_work(&hdev->rx_work);
+               flush_work(&hdev->cmd_work);
 
                skb_queue_purge(&hdev->cmd_q);
                skb_queue_purge(&hdev->rx_q);
index 251b912..eed0dd0 100644 (file)
@@ -762,7 +762,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
                /* Detach sockets from device */
                read_lock(&hci_sk_list.lock);
                sk_for_each(sk, &hci_sk_list.head) {
-                       bh_lock_sock_nested(sk);
+                       lock_sock(sk);
                        if (hci_pi(sk)->hdev == hdev) {
                                hci_pi(sk)->hdev = NULL;
                                sk->sk_err = EPIPE;
@@ -771,7 +771,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
 
                                hci_dev_put(hdev);
                        }
-                       bh_unlock_sock(sk);
+                       release_sock(sk);
                }
                read_unlock(&hci_sk_list.lock);
        }
index c10e5a5..4401397 100644 (file)
@@ -308,7 +308,7 @@ static void dev_flowctrl(struct net_device *dev, int on)
        caifd_put(caifd);
 }
 
-void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
+int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
                     struct cflayer *link_support, int head_room,
                     struct cflayer **layer,
                     int (**rcv_func)(struct sk_buff *, struct net_device *,
@@ -319,11 +319,12 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
        enum cfcnfg_phy_preference pref;
        struct cfcnfg *cfg = get_cfcnfg(dev_net(dev));
        struct caif_device_entry_list *caifdevs;
+       int res;
 
        caifdevs = caif_device_list(dev_net(dev));
        caifd = caif_device_alloc(dev);
        if (!caifd)
-               return;
+               return -ENOMEM;
        *layer = &caifd->layer;
        spin_lock_init(&caifd->flow_lock);
 
@@ -344,7 +345,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
        strlcpy(caifd->layer.name, dev->name,
                sizeof(caifd->layer.name));
        caifd->layer.transmit = transmit;
-       cfcnfg_add_phy_layer(cfg,
+       res = cfcnfg_add_phy_layer(cfg,
                                dev,
                                &caifd->layer,
                                pref,
@@ -354,6 +355,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
        mutex_unlock(&caifdevs->lock);
        if (rcv_func)
                *rcv_func = receive;
+       return res;
 }
 EXPORT_SYMBOL(caif_enroll_dev);
 
@@ -368,6 +370,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
        struct cflayer *layer, *link_support;
        int head_room = 0;
        struct caif_device_entry_list *caifdevs;
+       int res;
 
        cfg = get_cfcnfg(dev_net(dev));
        caifdevs = caif_device_list(dev_net(dev));
@@ -393,8 +396,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
                                break;
                        }
                }
-               caif_enroll_dev(dev, caifdev, link_support, head_room,
+               res = caif_enroll_dev(dev, caifdev, link_support, head_room,
                                &layer, NULL);
+               if (res)
+                       cfserl_release(link_support);
                caifdev->flowctrl = dev_flowctrl;
                break;
 
index a0116b9..b02e129 100644 (file)
@@ -115,6 +115,11 @@ static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN],
        return (struct cflayer *) this;
 }
 
+static void cfusbl_release(struct cflayer *layer)
+{
+       kfree(layer);
+}
+
 static struct packet_type caif_usb_type __read_mostly = {
        .type = cpu_to_be16(ETH_P_802_EX1),
 };
@@ -127,6 +132,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
        struct cflayer *layer, *link_support;
        struct usbnet *usbnet;
        struct usb_device *usbdev;
+       int res;
 
        /* Check whether we have a NCM device, and find its VID/PID. */
        if (!(dev->dev.parent && dev->dev.parent->driver &&
@@ -169,8 +175,11 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
        if (dev->num_tx_queues > 1)
                pr_warn("USB device uses more than one tx queue\n");
 
-       caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN,
+       res = caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN,
                        &layer, &caif_usb_type.func);
+       if (res)
+               goto err;
+
        if (!pack_added)
                dev_add_pack(&caif_usb_type);
        pack_added = true;
@@ -178,6 +187,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
        strlcpy(layer->name, dev->name, sizeof(layer->name));
 
        return 0;
+err:
+       cfusbl_release(link_support);
+       return res;
 }
 
 static struct notifier_block caif_device_notifier = {
index 399239a..cac30e6 100644 (file)
@@ -450,7 +450,7 @@ unlock:
        rcu_read_unlock();
 }
 
-void
+int
 cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
                     struct net_device *dev, struct cflayer *phy_layer,
                     enum cfcnfg_phy_preference pref,
@@ -459,7 +459,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
 {
        struct cflayer *frml;
        struct cfcnfg_phyinfo *phyinfo = NULL;
-       int i;
+       int i, res = 0;
        u8 phyid;
 
        mutex_lock(&cnfg->lock);
@@ -473,12 +473,15 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
                        goto got_phyid;
        }
        pr_warn("Too many CAIF Link Layers (max 6)\n");
+       res = -EEXIST;
        goto out;
 
 got_phyid:
        phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC);
-       if (!phyinfo)
+       if (!phyinfo) {
+               res = -ENOMEM;
                goto out_err;
+       }
 
        phy_layer->id = phyid;
        phyinfo->pref = pref;
@@ -492,8 +495,10 @@ got_phyid:
 
        frml = cffrml_create(phyid, fcs);
 
-       if (!frml)
+       if (!frml) {
+               res = -ENOMEM;
                goto out_err;
+       }
        phyinfo->frm_layer = frml;
        layer_set_up(frml, cnfg->mux);
 
@@ -511,11 +516,12 @@ got_phyid:
        list_add_rcu(&phyinfo->node, &cnfg->phys);
 out:
        mutex_unlock(&cnfg->lock);
-       return;
+       return res;
 
 out_err:
        kfree(phyinfo);
        mutex_unlock(&cnfg->lock);
+       return res;
 }
 EXPORT_SYMBOL(cfcnfg_add_phy_layer);
 
index e11725a..40cd57a 100644 (file)
@@ -31,6 +31,11 @@ static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
 static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
                           int phyid);
 
+void cfserl_release(struct cflayer *layer)
+{
+       kfree(layer);
+}
+
 struct cflayer *cfserl_create(int instance, bool use_stx)
 {
        struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC);
index ddd15af..210fc3b 100644 (file)
@@ -177,7 +177,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
        if (kcmlen > stackbuf_size)
                kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL);
        if (kcmsg == NULL)
-               return -ENOBUFS;
+               return -ENOMEM;
 
        /* Now copy them over neatly. */
        memset(kcmsg, 0, kcmlen);
index 4eb9695..051432e 100644 (file)
@@ -705,7 +705,6 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
        case DEVLINK_PORT_FLAVOUR_PHYSICAL:
        case DEVLINK_PORT_FLAVOUR_CPU:
        case DEVLINK_PORT_FLAVOUR_DSA:
-       case DEVLINK_PORT_FLAVOUR_VIRTUAL:
                if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
                                attrs->phys.port_number))
                        return -EMSGSIZE;
@@ -8631,7 +8630,6 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
 
        switch (attrs->flavour) {
        case DEVLINK_PORT_FLAVOUR_PHYSICAL:
-       case DEVLINK_PORT_FLAVOUR_VIRTUAL:
                if (!attrs->split)
                        n = snprintf(name, len, "p%u", attrs->phys.port_number);
                else
@@ -8679,6 +8677,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
                n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
                             attrs->pci_sf.sf);
                break;
+       case DEVLINK_PORT_FLAVOUR_VIRTUAL:
+               return -EOPNOTSUPP;
        }
 
        if (n >= len)
index cd80ffe..a9f9379 100644 (file)
@@ -1168,7 +1168,7 @@ static void notify_rule_change(int event, struct fib_rule *rule,
 {
        struct net *net;
        struct sk_buff *skb;
-       int err = -ENOBUFS;
+       int err = -ENOMEM;
 
        net = ops->fro_net;
        skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
index 714d5fa..3e84279 100644 (file)
@@ -4842,8 +4842,10 @@ static int rtnl_bridge_notify(struct net_device *dev)
        if (err < 0)
                goto errout;
 
-       if (!skb->len)
+       if (!skb->len) {
+               err = -EINVAL;
                goto errout;
+       }
 
        rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
        return 0;
index 958614e..946888a 100644 (file)
@@ -815,10 +815,18 @@ void sock_set_rcvbuf(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(sock_set_rcvbuf);
 
+static void __sock_set_mark(struct sock *sk, u32 val)
+{
+       if (val != sk->sk_mark) {
+               sk->sk_mark = val;
+               sk_dst_reset(sk);
+       }
+}
+
 void sock_set_mark(struct sock *sk, u32 val)
 {
        lock_sock(sk);
-       sk->sk_mark = val;
+       __sock_set_mark(sk, val);
        release_sock(sk);
 }
 EXPORT_SYMBOL(sock_set_mark);
@@ -1126,10 +1134,10 @@ set_sndbuf:
        case SO_MARK:
                if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
                        ret = -EPERM;
-               } else if (val != sk->sk_mark) {
-                       sk->sk_mark = val;
-                       sk_dst_reset(sk);
+                       break;
                }
+
+               __sock_set_mark(sk, val);
                break;
 
        case SO_RXQ_OVFL:
index 008c1ec..122ad58 100644 (file)
@@ -64,7 +64,7 @@
 #define DSA_8021Q_SUBVLAN_HI_SHIFT     9
 #define DSA_8021Q_SUBVLAN_HI_MASK      GENMASK(9, 9)
 #define DSA_8021Q_SUBVLAN_LO_SHIFT     4
-#define DSA_8021Q_SUBVLAN_LO_MASK      GENMASK(4, 3)
+#define DSA_8021Q_SUBVLAN_LO_MASK      GENMASK(5, 4)
 #define DSA_8021Q_SUBVLAN_HI(x)                (((x) & GENMASK(2, 2)) >> 2)
 #define DSA_8021Q_SUBVLAN_LO(x)                ((x) & GENMASK(1, 0))
 #define DSA_8021Q_SUBVLAN(x)           \
index 0c1b077..29bf976 100644 (file)
@@ -680,8 +680,10 @@ int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info)
            nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) ||
            nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
                        be32_to_cpu(params.frame_counter)) ||
-           ieee802154_llsec_fill_key_id(msg, &params.out_key))
+           ieee802154_llsec_fill_key_id(msg, &params.out_key)) {
+               rc = -ENOBUFS;
                goto out_free;
+       }
 
        dev_put(dev);
 
@@ -1184,7 +1186,7 @@ static int llsec_iter_devkeys(struct llsec_dump_data *data)
 {
        struct ieee802154_llsec_device *dpos;
        struct ieee802154_llsec_device_key *kpos;
-       int rc = 0, idx = 0, idx2;
+       int idx = 0, idx2;
 
        list_for_each_entry(dpos, &data->table->devices, list) {
                if (idx++ < data->s_idx)
@@ -1200,7 +1202,7 @@ static int llsec_iter_devkeys(struct llsec_dump_data *data)
                                                      data->nlmsg_seq,
                                                      dpos->hwaddr, kpos,
                                                      data->dev)) {
-                               return rc = -EMSGSIZE;
+                               return -EMSGSIZE;
                        }
 
                        data->s_idx2++;
@@ -1209,7 +1211,7 @@ static int llsec_iter_devkeys(struct llsec_dump_data *data)
                data->s_idx++;
        }
 
-       return rc;
+       return 0;
 }
 
 int ieee802154_llsec_dump_devkeys(struct sk_buff *skb,
index 2cdc7e6..88215b5 100644 (file)
@@ -241,8 +241,10 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
        }
 
        if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) ||
-           nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name))
+           nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name)) {
+               rc = -EMSGSIZE;
                goto nla_put_failure;
+       }
        dev_put(dev);
 
        wpan_phy_put(phy);
index 05f6bd8..0cf2374 100644 (file)
@@ -1298,19 +1298,20 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla,
        if (!nla || nla_parse_nested_deprecated(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, nl802154_dev_addr_policy, NULL))
                return -EINVAL;
 
-       if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] ||
-           !attrs[NL802154_DEV_ADDR_ATTR_MODE] ||
-           !(attrs[NL802154_DEV_ADDR_ATTR_SHORT] ||
-             attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]))
+       if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || !attrs[NL802154_DEV_ADDR_ATTR_MODE])
                return -EINVAL;
 
        addr->pan_id = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_PAN_ID]);
        addr->mode = nla_get_u32(attrs[NL802154_DEV_ADDR_ATTR_MODE]);
        switch (addr->mode) {
        case NL802154_DEV_ADDR_SHORT:
+               if (!attrs[NL802154_DEV_ADDR_ATTR_SHORT])
+                       return -EINVAL;
                addr->short_addr = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_SHORT]);
                break;
        case NL802154_DEV_ADDR_EXTENDED:
+               if (!attrs[NL802154_DEV_ADDR_ATTR_EXTENDED])
+                       return -EINVAL;
                addr->extended_addr = nla_get_le64(attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]);
                break;
        default:
index bc2f6ca..816d8aa 100644 (file)
@@ -886,7 +886,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
 
 
 /*
- *  Copy BOOTP-supplied string if not already set.
+ *  Copy BOOTP-supplied string
  */
 static int __init ic_bootp_string(char *dest, char *src, int len, int max)
 {
@@ -935,12 +935,15 @@ static void __init ic_do_bootp_ext(u8 *ext)
                }
                break;
        case 12:        /* Host name */
-               ic_bootp_string(utsname()->nodename, ext+1, *ext,
-                               __NEW_UTS_LEN);
-               ic_host_name_set = 1;
+               if (!ic_host_name_set) {
+                       ic_bootp_string(utsname()->nodename, ext+1, *ext,
+                                       __NEW_UTS_LEN);
+                       ic_host_name_set = 1;
+               }
                break;
        case 15:        /* Domain name (DNS) */
-               ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain));
+               if (!ic_domain[0])
+                       ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain));
                break;
        case 17:        /* Root path */
                if (!root_server_path[0])
index a22822b..d417e51 100644 (file)
@@ -3673,11 +3673,11 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
        if (nh) {
                if (rt->fib6_src.plen) {
                        NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
-                       goto out;
+                       goto out_free;
                }
                if (!nexthop_get(nh)) {
                        NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
-                       goto out;
+                       goto out_free;
                }
                rt->nh = nh;
                fib6_nh = nexthop_fib6_nh(rt->nh);
@@ -3714,6 +3714,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 out:
        fib6_info_release(rt);
        return ERR_PTR(err);
+out_free:
+       ip_fib_metrics_put(rt->fib6_metrics);
+       kfree(rt);
+       return ERR_PTR(err);
 }
 
 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
index aa98294..f7c8110 100644 (file)
@@ -271,6 +271,9 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
        if (ipip6_tunnel_create(dev) < 0)
                goto failed_free;
 
+       if (!parms->name[0])
+               strcpy(parms->name, dev->name);
+
        return nt;
 
 failed_free:
index 6201965..1c572c8 100644 (file)
@@ -1066,6 +1066,11 @@ out_error:
                goto partial_message;
        }
 
+       if (skb_has_frag_list(head)) {
+               kfree_skb_list(skb_shinfo(head)->frag_list);
+               skb_shinfo(head)->frag_list = NULL;
+       }
+
        if (head != kcm->seq_skb)
                kfree_skb(head);
 
index 2bc1995..5edc686 100644 (file)
@@ -947,6 +947,10 @@ static void __mptcp_update_wmem(struct sock *sk)
 {
        struct mptcp_sock *msk = mptcp_sk(sk);
 
+#ifdef CONFIG_LOCKDEP
+       WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
+#endif
+
        if (!msk->wmem_reserved)
                return;
 
@@ -1085,10 +1089,20 @@ out:
 
 static void __mptcp_clean_una_wakeup(struct sock *sk)
 {
+#ifdef CONFIG_LOCKDEP
+       WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
+#endif
        __mptcp_clean_una(sk);
        mptcp_write_space(sk);
 }
 
+static void mptcp_clean_una_wakeup(struct sock *sk)
+{
+       mptcp_data_lock(sk);
+       __mptcp_clean_una_wakeup(sk);
+       mptcp_data_unlock(sk);
+}
+
 static void mptcp_enter_memory_pressure(struct sock *sk)
 {
        struct mptcp_subflow_context *subflow;
@@ -2299,7 +2313,7 @@ static void __mptcp_retrans(struct sock *sk)
        struct sock *ssk;
        int ret;
 
-       __mptcp_clean_una_wakeup(sk);
+       mptcp_clean_una_wakeup(sk);
        dfrag = mptcp_rtx_head(sk);
        if (!dfrag) {
                if (mptcp_data_fin_enabled(msk)) {
index bde6be7..ef3d037 100644 (file)
@@ -630,21 +630,20 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 
        /* if the sk is MP_CAPABLE, we try to fetch the client key */
        if (subflow_req->mp_capable) {
-               if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
-                       /* here we can receive and accept an in-window,
-                        * out-of-order pkt, which will not carry the MP_CAPABLE
-                        * opt even on mptcp enabled paths
-                        */
-                       goto create_msk;
-               }
-
+               /* we can receive and accept an in-window, out-of-order pkt,
+                * which may not carry the MP_CAPABLE opt even on mptcp enabled
+                * paths: always try to extract the peer key, and fallback
+                * for packets missing it.
+                * Even OoO DSS packets coming legitly after dropped or
+                * reordered MPC will cause fallback, but we don't have other
+                * options.
+                */
                mptcp_get_options(skb, &mp_opt);
                if (!mp_opt.mp_capable) {
                        fallback = true;
                        goto create_child;
                }
 
-create_msk:
                new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
                if (!new_msk)
                        fallback = true;
@@ -1012,21 +1011,11 @@ static bool subflow_check_data_avail(struct sock *ssk)
 
                status = get_mapping_status(ssk, msk);
                trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
-               if (status == MAPPING_INVALID) {
-                       ssk->sk_err = EBADMSG;
-                       goto fatal;
-               }
-               if (status == MAPPING_DUMMY) {
-                       __mptcp_do_fallback(msk);
-                       skb = skb_peek(&ssk->sk_receive_queue);
-                       subflow->map_valid = 1;
-                       subflow->map_seq = READ_ONCE(msk->ack_seq);
-                       subflow->map_data_len = skb->len;
-                       subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
-                                                  subflow->ssn_offset;
-                       subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
-                       return true;
-               }
+               if (unlikely(status == MAPPING_INVALID))
+                       goto fallback;
+
+               if (unlikely(status == MAPPING_DUMMY))
+                       goto fallback;
 
                if (status != MAPPING_OK)
                        goto no_data;
@@ -1039,10 +1028,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
                 * MP_CAPABLE-based mapping
                 */
                if (unlikely(!READ_ONCE(msk->can_ack))) {
-                       if (!subflow->mpc_map) {
-                               ssk->sk_err = EBADMSG;
-                               goto fatal;
-                       }
+                       if (!subflow->mpc_map)
+                               goto fallback;
                        WRITE_ONCE(msk->remote_key, subflow->remote_key);
                        WRITE_ONCE(msk->ack_seq, subflow->map_seq);
                        WRITE_ONCE(msk->can_ack, true);
@@ -1070,17 +1057,31 @@ static bool subflow_check_data_avail(struct sock *ssk)
 no_data:
        subflow_sched_work_if_closed(msk, ssk);
        return false;
-fatal:
-       /* fatal protocol error, close the socket */
-       /* This barrier is coupled with smp_rmb() in tcp_poll() */
-       smp_wmb();
-       ssk->sk_error_report(ssk);
-       tcp_set_state(ssk, TCP_CLOSE);
-       subflow->reset_transient = 0;
-       subflow->reset_reason = MPTCP_RST_EMPTCP;
-       tcp_send_active_reset(ssk, GFP_ATOMIC);
-       subflow->data_avail = 0;
-       return false;
+
+fallback:
+       /* RFC 8684 section 3.7. */
+       if (subflow->mp_join || subflow->fully_established) {
+               /* fatal protocol error, close the socket.
+                * subflow_error_report() will introduce the appropriate barriers
+                */
+               ssk->sk_err = EBADMSG;
+               ssk->sk_error_report(ssk);
+               tcp_set_state(ssk, TCP_CLOSE);
+               subflow->reset_transient = 0;
+               subflow->reset_reason = MPTCP_RST_EMPTCP;
+               tcp_send_active_reset(ssk, GFP_ATOMIC);
+               subflow->data_avail = 0;
+               return false;
+       }
+
+       __mptcp_do_fallback(msk);
+       skb = skb_peek(&ssk->sk_receive_queue);
+       subflow->map_valid = 1;
+       subflow->map_seq = READ_ONCE(msk->ack_seq);
+       subflow->map_data_len = skb->len;
+       subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
+       subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
+       return true;
 }
 
 bool mptcp_subflow_data_available(struct sock *sk)
index d45dbcb..c250970 100644 (file)
@@ -1367,7 +1367,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
        ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
        svc->port = u->port;
        svc->fwmark = u->fwmark;
-       svc->flags = u->flags;
+       svc->flags = u->flags & ~IP_VS_SVC_F_HASHED;
        svc->timeout = u->timeout * HZ;
        svc->netmask = u->netmask;
        svc->ipvs = ipvs;
index 89e5bac..dc9ca12 100644 (file)
@@ -664,7 +664,7 @@ int nf_conntrack_proto_init(void)
 
 #if IS_ENABLED(CONFIG_IPV6)
 cleanup_sockopt:
-       nf_unregister_sockopt(&so_getorigdst6);
+       nf_unregister_sockopt(&so_getorigdst);
 #endif
        return ret;
 }
index d63d2d8..72bc759 100644 (file)
@@ -736,7 +736,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
                goto nla_put_failure;
 
        if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
-           nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
+           nla_put_be32(skb, NFTA_TABLE_FLAGS,
+                        htonl(table->flags & NFT_TABLE_F_MASK)) ||
            nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
            nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
                         NFTA_TABLE_PAD))
@@ -947,20 +948,22 @@ err_register_hooks:
 
 static void nf_tables_table_disable(struct net *net, struct nft_table *table)
 {
+       table->flags &= ~NFT_TABLE_F_DORMANT;
        nft_table_disable(net, table, 0);
+       table->flags |= NFT_TABLE_F_DORMANT;
 }
 
-enum {
-       NFT_TABLE_STATE_UNCHANGED       = 0,
-       NFT_TABLE_STATE_DORMANT,
-       NFT_TABLE_STATE_WAKEUP
-};
+#define __NFT_TABLE_F_INTERNAL         (NFT_TABLE_F_MASK + 1)
+#define __NFT_TABLE_F_WAS_DORMANT      (__NFT_TABLE_F_INTERNAL << 0)
+#define __NFT_TABLE_F_WAS_AWAKEN       (__NFT_TABLE_F_INTERNAL << 1)
+#define __NFT_TABLE_F_UPDATE           (__NFT_TABLE_F_WAS_DORMANT | \
+                                        __NFT_TABLE_F_WAS_AWAKEN)
 
 static int nf_tables_updtable(struct nft_ctx *ctx)
 {
        struct nft_trans *trans;
        u32 flags;
-       int ret = 0;
+       int ret;
 
        if (!ctx->nla[NFTA_TABLE_FLAGS])
                return 0;
@@ -985,21 +988,27 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 
        if ((flags & NFT_TABLE_F_DORMANT) &&
            !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
-               nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT;
+               ctx->table->flags |= NFT_TABLE_F_DORMANT;
+               if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE))
+                       ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN;
        } else if (!(flags & NFT_TABLE_F_DORMANT) &&
                   ctx->table->flags & NFT_TABLE_F_DORMANT) {
-               ret = nf_tables_table_enable(ctx->net, ctx->table);
-               if (ret >= 0)
-                       nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP;
+               ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+               if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) {
+                       ret = nf_tables_table_enable(ctx->net, ctx->table);
+                       if (ret < 0)
+                               goto err_register_hooks;
+
+                       ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT;
+               }
        }
-       if (ret < 0)
-               goto err;
 
-       nft_trans_table_flags(trans) = flags;
        nft_trans_table_update(trans) = true;
        nft_trans_commit_list_add_tail(ctx->net, trans);
+
        return 0;
-err:
+
+err_register_hooks:
        nft_trans_destroy(trans);
        return ret;
 }
@@ -1905,7 +1914,7 @@ static int nft_chain_parse_netdev(struct net *net,
 static int nft_chain_parse_hook(struct net *net,
                                const struct nlattr * const nla[],
                                struct nft_chain_hook *hook, u8 family,
-                               bool autoload)
+                               struct netlink_ext_ack *extack, bool autoload)
 {
        struct nftables_pernet *nft_net = nft_pernet(net);
        struct nlattr *ha[NFTA_HOOK_MAX + 1];
@@ -1935,8 +1944,10 @@ static int nft_chain_parse_hook(struct net *net,
        if (nla[NFTA_CHAIN_TYPE]) {
                type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE],
                                                   family, autoload);
-               if (IS_ERR(type))
+               if (IS_ERR(type)) {
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]);
                        return PTR_ERR(type);
+               }
        }
        if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
                return -EOPNOTSUPP;
@@ -1945,8 +1956,11 @@ static int nft_chain_parse_hook(struct net *net,
            hook->priority <= NF_IP_PRI_CONNTRACK)
                return -EOPNOTSUPP;
 
-       if (!try_module_get(type->owner))
+       if (!try_module_get(type->owner)) {
+               if (nla[NFTA_CHAIN_TYPE])
+                       NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]);
                return -ENOENT;
+       }
 
        hook->type = type;
 
@@ -2057,7 +2071,8 @@ static int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
 static u64 chain_id;
 
 static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
-                             u8 policy, u32 flags)
+                             u8 policy, u32 flags,
+                             struct netlink_ext_ack *extack)
 {
        const struct nlattr * const *nla = ctx->nla;
        struct nft_table *table = ctx->table;
@@ -2079,7 +2094,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
                if (flags & NFT_CHAIN_BINDING)
                        return -EOPNOTSUPP;
 
-               err = nft_chain_parse_hook(net, nla, &hook, family, true);
+               err = nft_chain_parse_hook(net, nla, &hook, family, extack,
+                                          true);
                if (err < 0)
                        return err;
 
@@ -2234,7 +2250,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
                        return -EEXIST;
                }
                err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
-                                          false);
+                                          extack, false);
                if (err < 0)
                        return err;
 
@@ -2447,7 +2463,7 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
                                          extack);
        }
 
-       return nf_tables_addchain(&ctx, family, genmask, policy, flags);
+       return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack);
 }
 
 static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
@@ -3328,8 +3344,10 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
                        if (n == NFT_RULE_MAXEXPRS)
                                goto err1;
                        err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]);
-                       if (err < 0)
+                       if (err < 0) {
+                               NL_SET_BAD_ATTR(extack, tmp);
                                goto err1;
+                       }
                        size += expr_info[n].ops->size;
                        n++;
                }
@@ -8547,10 +8565,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                switch (trans->msg_type) {
                case NFT_MSG_NEWTABLE:
                        if (nft_trans_table_update(trans)) {
-                               if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT)
+                               if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
+                                       nft_trans_destroy(trans);
+                                       break;
+                               }
+                               if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT)
                                        nf_tables_table_disable(net, trans->ctx.table);
 
-                               trans->ctx.table->flags = nft_trans_table_flags(trans);
+                               trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
                        } else {
                                nft_clear(net, trans->ctx.table);
                        }
@@ -8768,9 +8790,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                switch (trans->msg_type) {
                case NFT_MSG_NEWTABLE:
                        if (nft_trans_table_update(trans)) {
-                               if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP)
+                               if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
+                                       nft_trans_destroy(trans);
+                                       break;
+                               }
+                               if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) {
                                        nf_tables_table_disable(net, trans->ctx.table);
-
+                                       trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+                               } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
+                                       trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT;
+                               }
+                               trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
                                nft_trans_destroy(trans);
                        } else {
                                list_del_rcu(&trans->ctx.table->list);
index 322ac5d..752b10c 100644 (file)
@@ -380,10 +380,14 @@ static int
 nfnl_cthelper_update(const struct nlattr * const tb[],
                     struct nf_conntrack_helper *helper)
 {
+       u32 size;
        int ret;
 
-       if (tb[NFCTH_PRIV_DATA_LEN])
-               return -EBUSY;
+       if (tb[NFCTH_PRIV_DATA_LEN]) {
+               size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
+               if (size != helper->data_len)
+                       return -EBUSY;
+       }
 
        if (tb[NFCTH_POLICY]) {
                ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]);
index 0592a94..337e22d 100644 (file)
@@ -1217,7 +1217,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj,
        struct nf_conn *ct;
 
        ct = nf_ct_get(pkt->skb, &ctinfo);
-       if (!ct || ctinfo == IP_CT_UNTRACKED) {
+       if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) {
                regs->verdict.code = NFT_BREAK;
                return;
        }
index 53dbe73..6cfd30f 100644 (file)
@@ -110,6 +110,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
        if (!llcp_sock->service_name) {
                nfc_llcp_local_put(llcp_sock->local);
                llcp_sock->local = NULL;
+               llcp_sock->dev = NULL;
                ret = -ENOMEM;
                goto put_dev;
        }
@@ -119,6 +120,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
                llcp_sock->local = NULL;
                kfree(llcp_sock->service_name);
                llcp_sock->service_name = NULL;
+               llcp_sock->dev = NULL;
                ret = -EADDRINUSE;
                goto put_dev;
        }
index ec7a1c4..18edd9a 100644 (file)
@@ -984,7 +984,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
         */
        cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
        if (!cached) {
-               if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) {
+               if (tcf_ct_flow_table_lookup(p, skb, family)) {
                        skip_add = true;
                        goto do_nat;
                }
@@ -1022,10 +1022,11 @@ do_nat:
                 * even if the connection is already confirmed.
                 */
                nf_conntrack_confirm(skb);
-       } else if (!skip_add) {
-               tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
        }
 
+       if (!skip_add)
+               tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
+
 out_push:
        skb_push_rcsum(skb, nh_ofs);
 
@@ -1202,9 +1203,6 @@ static int tcf_ct_fill_params(struct net *net,
                                   sizeof(p->zone));
        }
 
-       if (p->zone == NF_CT_DEFAULT_ZONE_ID)
-               return 0;
-
        nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0);
        tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL);
        if (!tmpl) {
index 081c11d..8827987 100644 (file)
@@ -1488,7 +1488,8 @@ static void htb_parent_to_leaf_offload(struct Qdisc *sch,
        struct Qdisc *old_q;
 
        /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
-       qdisc_refcount_inc(new_q);
+       if (new_q)
+               qdisc_refcount_inc(new_q);
        old_q = htb_graft_helper(dev_queue, new_q);
        WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
 }
@@ -1675,10 +1676,9 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
                                          cl->parent->common.classid,
                                          NULL);
                if (q->offload) {
-                       if (new_q) {
+                       if (new_q)
                                htb_set_lockdep_class_child(new_q);
-                               htb_parent_to_leaf_offload(sch, dev_queue, new_q);
-                       }
+                       htb_parent_to_leaf_offload(sch, dev_queue, new_q);
                }
        }
 
index 76a6f8c..bd9f156 100644 (file)
@@ -50,6 +50,7 @@ static void tls_device_gc_task(struct work_struct *work);
 static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task);
 static LIST_HEAD(tls_device_gc_list);
 static LIST_HEAD(tls_device_list);
+static LIST_HEAD(tls_device_down_list);
 static DEFINE_SPINLOCK(tls_device_lock);
 
 static void tls_device_free_ctx(struct tls_context *ctx)
@@ -680,15 +681,13 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx,
        struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
        struct net_device *netdev;
 
-       if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags)))
-               return;
-
        trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type);
+       rcu_read_lock();
        netdev = READ_ONCE(tls_ctx->netdev);
        if (netdev)
                netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn,
                                                   TLS_OFFLOAD_CTX_DIR_RX);
-       clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags);
+       rcu_read_unlock();
        TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC);
 }
 
@@ -761,6 +760,8 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
 
        if (tls_ctx->rx_conf != TLS_HW)
                return;
+       if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags)))
+               return;
 
        prot = &tls_ctx->prot_info;
        rx_ctx = tls_offload_ctx_rx(tls_ctx);
@@ -963,6 +964,17 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
 
        ctx->sw.decrypted |= is_decrypted;
 
+       if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) {
+               if (likely(is_encrypted || is_decrypted))
+                       return 0;
+
+               /* After tls_device_down disables the offload, the next SKB will
+                * likely have initial fragments decrypted, and final ones not
+                * decrypted. We need to reencrypt that single SKB.
+                */
+               return tls_device_reencrypt(sk, skb);
+       }
+
        /* Return immediately if the record is either entirely plaintext or
         * entirely ciphertext. Otherwise handle reencrypt partially decrypted
         * record.
@@ -1292,6 +1304,26 @@ static int tls_device_down(struct net_device *netdev)
        spin_unlock_irqrestore(&tls_device_lock, flags);
 
        list_for_each_entry_safe(ctx, tmp, &list, list) {
+               /* Stop offloaded TX and switch to the fallback.
+                * tls_is_sk_tx_device_offloaded will return false.
+                */
+               WRITE_ONCE(ctx->sk->sk_validate_xmit_skb, tls_validate_xmit_skb_sw);
+
+               /* Stop the RX and TX resync.
+                * tls_dev_resync must not be called after tls_dev_del.
+                */
+               WRITE_ONCE(ctx->netdev, NULL);
+
+               /* Start skipping the RX resync logic completely. */
+               set_bit(TLS_RX_DEV_DEGRADED, &ctx->flags);
+
+               /* Sync with inflight packets. After this point:
+                * TX: no non-encrypted packets will be passed to the driver.
+                * RX: resync requests from the driver will be ignored.
+                */
+               synchronize_net();
+
+               /* Release the offload context on the driver side. */
                if (ctx->tx_conf == TLS_HW)
                        netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
                                                        TLS_OFFLOAD_CTX_DIR_TX);
@@ -1299,15 +1331,21 @@ static int tls_device_down(struct net_device *netdev)
                    !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags))
                        netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
                                                        TLS_OFFLOAD_CTX_DIR_RX);
-               WRITE_ONCE(ctx->netdev, NULL);
-               smp_mb__before_atomic(); /* pairs with test_and_set_bit() */
-               while (test_bit(TLS_RX_SYNC_RUNNING, &ctx->flags))
-                       usleep_range(10, 200);
+
                dev_put(netdev);
-               list_del_init(&ctx->list);
 
-               if (refcount_dec_and_test(&ctx->refcount))
-                       tls_device_free_ctx(ctx);
+               /* Move the context to a separate list for two reasons:
+                * 1. When the context is deallocated, list_del is called.
+                * 2. It's no longer an offloaded context, so we don't want to
+                *    run offload-specific code on this context.
+                */
+               spin_lock_irqsave(&tls_device_lock, flags);
+               list_move_tail(&ctx->list, &tls_device_down_list);
+               spin_unlock_irqrestore(&tls_device_lock, flags);
+
+               /* Device contexts for RX and TX will be freed in on sk_destruct
+                * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
+                */
        }
 
        up_write(&device_offload_lock);
index cacf040..e40bedd 100644 (file)
@@ -431,6 +431,13 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(tls_validate_xmit_skb);
 
+struct sk_buff *tls_validate_xmit_skb_sw(struct sock *sk,
+                                        struct net_device *dev,
+                                        struct sk_buff *skb)
+{
+       return tls_sw_fallback(sk, skb);
+}
+
 struct sk_buff *tls_encrypt_skb(struct sk_buff *skb)
 {
        return tls_sw_fallback(skb->sk, skb);
index 47b7c53..fde56ff 100644 (file)
@@ -636,6 +636,7 @@ struct tls_context *tls_ctx_create(struct sock *sk)
        mutex_init(&ctx->tx_lock);
        rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
        ctx->sk_proto = READ_ONCE(sk->sk_prot);
+       ctx->sk = sk;
        return ctx;
 }
 
index 44d6566..1816899 100644 (file)
@@ -536,7 +536,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol,
        if (protocol)
                goto out;
 
-       rc = -ENOBUFS;
+       rc = -ENOMEM;
        if ((sk = x25_alloc_socket(net, kern)) == NULL)
                goto out;
 
index dd87cea..a7883e4 100644 (file)
@@ -59,7 +59,7 @@ quiet_cmd_ld_ko_o = LD [M]  $@
 quiet_cmd_btf_ko = BTF [M] $@
       cmd_btf_ko =                                                     \
        if [ -f vmlinux ]; then                                         \
-               LLVM_OBJCOPY=$(OBJCOPY) $(PAHOLE) -J --btf_base vmlinux $@; \
+               LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J --btf_base vmlinux $@; \
        else                                                            \
                printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \
        fi;
index f4de4c9..0e0f646 100755 (executable)
@@ -240,7 +240,7 @@ gen_btf()
        fi
 
        info "BTF" ${2}
-       LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${extra_paholeopt} ${1}
+       LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${extra_paholeopt} ${1}
 
        # Create ${2} which contains just .BTF section but no symbols. Add
        # SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all
index 3c4cb72..9ca5f1b 100755 (executable)
@@ -501,6 +501,7 @@ do_transfer()
        local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
        local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
        local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+       local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue")
 
        expect_synrx=$((stat_synrx_last_l))
        expect_ackrx=$((stat_ackrx_last_l))
@@ -518,10 +519,14 @@ do_transfer()
                        "${stat_synrx_now_l}" "${expect_synrx}" 1>&2
                retc=1
        fi
-       if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then
-               printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
-                       "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
-               rets=1
+       if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then
+               if [ ${stat_ooo_now} -eq 0 ]; then
+                       printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
+                               "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
+                       rets=1
+               else
+                       printf "[ Note ] fallback due to TCP OoO"
+               fi
        fi
 
        if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
index 7ed7cd9..ebc4ee0 100755 (executable)
@@ -363,6 +363,7 @@ ip1 -6 rule add table main suppress_prefixlength 0
 ip1 -4 route add default dev wg0 table 51820
 ip1 -4 rule add not fwmark 51820 table 51820
 ip1 -4 rule add table main suppress_prefixlength 0
+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter'
 # Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
 n1 ping -W 1 -c 100 -f 192.168.99.7
 n1 ping -W 1 -c 100 -f abab::1111
index 4eecb43..74db83a 100644 (file)
@@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y
 CONFIG_NETFILTER_XT_NAT=y
 CONFIG_NETFILTER_XT_MATCH_LENGTH=y
 CONFIG_NETFILTER_XT_MARK=y
-CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_NF_NAT_IPV4=y
 CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_NF_FILTER=y