Merge tag 'mac80211-next-for-net-next-2021-01-27' of git://git.kernel.org/pub/scm...
authorJakub Kicinski <kuba@kernel.org>
Thu, 28 Jan 2021 03:01:06 +0000 (19:01 -0800)
committerJakub Kicinski <kuba@kernel.org>
Thu, 28 Jan 2021 03:01:06 +0000 (19:01 -0800)
Johannes Berg says:

====================
More updates:
 * many minstrel improvements, including removal of the old
   minstrel in favour of minstrel_ht
 * speed improvements on FQ
 * support for RX decapsulation (header conversion) offload
 * RTNL reduction: limit RTNL usage in the wireless stack
   mostly to where really needed (regulatory not yet) to
   reduce contention on it

* tag 'mac80211-next-for-net-next-2021-01-27' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next: (24 commits)
  mac80211: minstrel_ht: fix regression in the max_prob_rate fix
  virt_wifi: fix deadlock on RTNL
  cfg80211: avoid holding the RTNL when calling the driver
  cfg80211: change netdev registration/unregistration semantics
  mac80211: minstrel_ht: fix rounding error in throughput calculation
  mac80211: minstrel_ht: increase stats update interval
  mac80211: minstrel_ht: fix max probability rate selection
  mac80211: minstrel_ht: improve sample rate selection
  mac80211: minstrel_ht: improve ampdu length estimation
  mac80211: minstrel_ht: remove old ewma based rate average code
  mac80211: remove legacy minstrel rate control
  mac80211: minstrel_ht: add support for OFDM rates on non-HT clients
  mac80211: minstrel_ht: clean up CCK code
  mac80211: introduce aql_enable node in debugfs
  cfg80211: Add phyrate conversion support for extended MCS in 60GHz band
  cfg80211: add VHT rate entries for MCS-10 and MCS-11
  mac80211: reduce peer HE MCS/NSS to own capabilities
  mac80211: remove NSS number of 160MHz if not support 160MHz for HE
  mac80211_hwsim: add 6GHz channels
  mac80211: add LDPC encoding to ieee80211_parse_tx_radiotap
  ...
====================

Link: https://lore.kernel.org/r/20210127210915.135550-1-johannes@sipsolutions.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
160 files changed:
Documentation/devicetree/bindings/net/dsa/mt7530.txt
Documentation/devicetree/bindings/net/qcom,ipa.yaml
Documentation/devicetree/bindings/net/renesas,etheravb.yaml
Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
Documentation/networking/devlink/devlink-resource.rst
Documentation/networking/ip-sysctl.rst
MAINTAINERS
Makefile
arch/arm64/boot/dts/qcom/sc7180.dtsi
arch/arm64/boot/dts/qcom/sdm845.dtsi
drivers/net/Kconfig
drivers/net/can/dev/dev.c
drivers/net/can/dev/length.c
drivers/net/can/flexcan.c
drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
drivers/net/can/usb/mcba_usb.c
drivers/net/dsa/hirschmann/hellcreek.c
drivers/net/dsa/hirschmann/hellcreek.h
drivers/net/dsa/mt7530.c
drivers/net/dsa/mt7530.h
drivers/net/dsa/mv88e6xxx/Kconfig
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/global1.h
drivers/net/dsa/mv88e6xxx/global1_vtu.c
drivers/net/ethernet/Kconfig
drivers/net/ethernet/Makefile
drivers/net/ethernet/aurora/Kconfig [deleted file]
drivers/net/ethernet/aurora/Makefile [deleted file]
drivers/net/ethernet/aurora/nb8800.c [deleted file]
drivers/net/ethernet/aurora/nb8800.h [deleted file]
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/broadcom/tg3.h
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/freescale/ucc_geth.c
drivers/net/ethernet/freescale/ucc_geth.h
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/marvell/Kconfig
drivers/net/ethernet/marvell/octeontx2/af/mbox.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mediatek/mtk_eth_soc.h
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/params.h
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
drivers/net/ethernet/mellanox/mlx5/core/en/qos.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/qos.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
drivers/net/ethernet/mellanox/mlx5/core/qos.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/qos.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/mellanox/mlxsw/core.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/rocker/rocker_ofdpa.c
drivers/net/ethernet/sfc/efx_channels.c
drivers/net/ipa/gsi.c
drivers/net/ipa/ipa.h
drivers/net/ipa/ipa_main.c
drivers/net/mhi_net.c
drivers/net/phy/realtek.c
drivers/net/usb/usbnet.c
drivers/nfc/fdp/i2c.c
drivers/nfc/trf7970a.c
drivers/soc/fsl/qe/qe_common.c
include/linux/can/dev.h
include/linux/ipv6.h
include/linux/mlx5/mlx5_ifc.h
include/linux/netdev_features.h
include/linux/netdevice.h
include/linux/skbuff.h
include/linux/tcp.h
include/net/devlink.h
include/net/inet_connection_sock.h
include/net/ip6_route.h
include/net/pkt_cls.h
include/net/sch_generic.h
include/soc/fsl/qe/qe.h
include/soc/fsl/qe/ucc_fast.h
include/uapi/linux/if_link.h
include/uapi/linux/ipv6.h
include/uapi/linux/pkt_sched.h
include/uapi/linux/sysctl.h
include/uapi/linux/tcp.h
net/9p/Kconfig
net/Makefile
net/batman-adv/Kconfig
net/bluetooth/Kconfig
net/bpfilter/Kconfig
net/bridge/Makefile
net/bridge/br_multicast.c
net/bridge/br_multicast_eht.c [new file with mode: 0644]
net/bridge/br_netlink.c
net/bridge/br_private.h
net/bridge/br_private_mcast_eht.h [new file with mode: 0644]
net/bridge/br_sysfs_if.c
net/can/Kconfig
net/can/gw.c
net/core/dev.c
net/core/devlink.c
net/core/pktgen.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/sysctl_net_core.c
net/dcb/Makefile
net/dns_resolver/Kconfig
net/ethtool/common.c
net/ife/Kconfig
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/udp_offload.c
net/ipv6/addrconf.c
net/ipv6/ndisc.c
net/ipv6/route.c
net/l3mdev/Makefile
net/llc/Kconfig
net/mptcp/pm_netlink.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/mptcp/subflow.c
net/netfilter/Kconfig
net/netfilter/ipvs/Kconfig
net/nfc/Kconfig
net/psample/Kconfig
net/sched/sch_api.c
net/sched/sch_atm.c
net/sched/sch_cbq.c
net/sched/sch_drr.c
net/sched/sch_dsmark.c
net/sched/sch_hfsc.c
net/sched/sch_htb.c
net/sched/sch_qfq.c
net/sched/sch_sfb.c
net/switchdev/Makefile
net/tipc/msg.c
tools/include/uapi/linux/pkt_sched.h
tools/testing/selftests/drivers/net/mlxsw/port_scale.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/net/mptcp/settings
tools/testing/selftests/net/unicast_extensions.sh [new file with mode: 0755]

index 560369e..de04626 100644 (file)
@@ -76,6 +76,12 @@ phy-mode must be set, see also example 2 below!
  * mt7621: phy-mode = "rgmii-txid";
  * mt7623: phy-mode = "rgmii";
 
+Optional properties:
+
+- gpio-controller: Boolean; if defined, MT7530's LED controller will run on
+       GPIO mode.
+- #gpio-cells: Must be 2 if gpio-controller is defined.
+
 See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
 required, optional properties and how the integrated switch subnodes must
 be specified.
index 8a2d126..8f86084 100644 (file)
@@ -113,13 +113,6 @@ properties:
       performing early IPA initialization, including loading and
       validating firwmare used by the GSI.
 
-  modem-remoteproc:
-    $ref: /schemas/types.yaml#/definitions/phandle
-    description:
-      This defines the phandle to the remoteproc node representing
-      the modem subsystem.  This is requied so the IPA driver can
-      receive and act on notifications of modem up/down events.
-
   memory-region:
     maxItems: 1
     description:
@@ -135,7 +128,6 @@ required:
   - interrupts
   - interconnects
   - qcom,smem-states
-  - modem-remoteproc
 
 oneOf:
   - required:
@@ -147,7 +139,7 @@ additionalProperties: false
 
 examples:
   - |
-        #include <dt-bindings/interrupt-controller/irq.h>
+        #include <dt-bindings/interrupt-controller/arm-gic.h>
         #include <dt-bindings/clock/qcom,rpmh.h>
         #include <dt-bindings/interconnect/qcom,sdm845.h>
 
@@ -168,7 +160,6 @@ examples:
                 compatible = "qcom,sdm845-ipa";
 
                 modem-init;
-                modem-remoteproc = <&mss_pil>;
 
                 iommus = <&apps_smmu 0x720 0x3>;
                 reg = <0x1e40000 0x7000>,
@@ -178,8 +169,8 @@ examples:
                             "ipa-shared",
                             "gsi";
 
-                interrupts-extended = <&intc 0 311 IRQ_TYPE_EDGE_RISING>,
-                                      <&intc 0 432 IRQ_TYPE_LEVEL_HIGH>,
+                interrupts-extended = <&intc GIC_SPI 311 IRQ_TYPE_EDGE_RISING>,
+                                      <&intc GIC_SPI 432 IRQ_TYPE_LEVEL_HIGH>,
                                       <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
                                       <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>;
                 interrupt-names = "ipa",
index de9dd57..91ba96d 100644 (file)
@@ -40,6 +40,7 @@ properties:
               - renesas,etheravb-r8a77980     # R-Car V3H
               - renesas,etheravb-r8a77990     # R-Car E3
               - renesas,etheravb-r8a77995     # R-Car D3
+              - renesas,etheravb-r8a779a0     # R-Car V3U
           - const: renesas,etheravb-rcar-gen3 # R-Car Gen3 and RZ/G2
 
   reg: true
@@ -170,6 +171,7 @@ allOf:
               - renesas,etheravb-r8a77965
               - renesas,etheravb-r8a77970
               - renesas,etheravb-r8a77980
+              - renesas,etheravb-r8a779a0
     then:
       required:
         - tx-internal-delay-ps
index 61e8504..dd5cd69 100644 (file)
@@ -217,3 +217,73 @@ For example::
         NPA_AF_ERR:
                NPA Error Interrupt Reg : 4096
                AQ Doorbell Error
+
+
+NIX Reporters
+-------------
+The NIX reporters are responsible for reporting and recovering the following group of errors:
+
+1. GENERAL events
+
+   - Receive mirror/multicast packet drop due to insufficient buffer.
+   - SMQ Flush operation.
+
+2. ERROR events
+
+   - Memory Fault due to WQE read/write from multicast/mirror buffer.
+   - Receive multicast/mirror replication list error.
+   - Receive packet on an unmapped PF.
+   - Fault due to NIX_AQ_INST_S read or NIX_AQ_RES_S write.
+   - AQ Doorbell Error.
+
+3. RAS events
+
+   - RAS Error Reporting for NIX Receive Multicast/Mirror Entry Structure.
+   - RAS Error Reporting for WQE/Packet Data read from Multicast/Mirror Buffer..
+   - RAS Error Reporting for NIX_AQ_INST_S/NIX_AQ_RES_S.
+
+4. RVU events
+
+   - Error due to unmapped slot.
+
+Sample Output::
+
+       ~# ./devlink health
+       pci/0002:01:00.0:
+         reporter hw_npa_intr
+           state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true
+         reporter hw_npa_gen
+           state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true
+         reporter hw_npa_err
+           state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true
+         reporter hw_npa_ras
+           state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true
+         reporter hw_nix_intr
+           state healthy error 1121 recover 1121 last_dump_date 2021-01-19 last_dump_time 05:42:26 grace_period 0 auto_recover true auto_dump true
+         reporter hw_nix_gen
+           state healthy error 949 recover 949 last_dump_date 2021-01-19 last_dump_time 05:42:43 grace_period 0 auto_recover true auto_dump true
+         reporter hw_nix_err
+           state healthy error 1147 recover 1147 last_dump_date 2021-01-19 last_dump_time 05:42:59 grace_period 0 auto_recover true auto_dump true
+         reporter hw_nix_ras
+           state healthy error 409 recover 409 last_dump_date 2021-01-19 last_dump_time 05:43:16 grace_period 0 auto_recover true auto_dump true
+
+Each reporter dumps the
+
+ - Error Type
+ - Error Register value
+ - Reason in words
+
+For example::
+
+       ~# devlink health dump show pci/0002:01:00.0 reporter hw_nix_intr
+        NIX_AF_RVU:
+               NIX RVU Interrupt Reg : 1
+               Unmap Slot Error
+       ~# devlink health dump show pci/0002:01:00.0 reporter hw_nix_gen
+        NIX_AF_GENERAL:
+               NIX General Interrupt Reg : 1
+               Rx multicast pkt drop
+       ~# devlink health dump show pci/0002:01:00.0 reporter hw_nix_err
+        NIX_AF_ERR:
+               NIX Error Interrupt Reg : 64
+               Rx on unmapped PF_FUNC
index 93e92d2..3d5ae51 100644 (file)
@@ -23,6 +23,20 @@ current size and related sub resources. To access a sub resource, you
 specify the path of the resource. For example ``/IPv4/fib`` is the id for
 the ``fib`` sub-resource under the ``IPv4`` resource.
 
+Generic Resources
+=================
+
+Generic resources are used to describe resources that can be shared by multiple
+device drivers and their description must be added to the following table:
+
+.. list-table:: List of Generic Resources
+   :widths: 10 90
+
+   * - Name
+     - Description
+   * - ``physical_ports``
+     - A limited capacity of physical ports that the switch ASIC can support
+
 example usage
 -------------
 
index dd2b12a..0e51ddd 100644 (file)
@@ -1871,6 +1871,16 @@ accept_ra_defrtr - BOOLEAN
                - enabled if accept_ra is enabled.
                - disabled if accept_ra is disabled.
 
+ra_defrtr_metric - UNSIGNED INTEGER
+       Route metric for default route learned in Router Advertisement. This value
+       will be assigned as metric for the default route learned via IPv6 Router
+       Advertisement. Takes affect only if accept_ra_defrtr is enabled.
+
+       Possible values:
+               1 to 0xFFFFFFFF
+
+               Default: IP6_RT_PRIO_USER i.e. 1024.
+
 accept_ra_from_local - BOOLEAN
        Accept RA with source-address that is found on local machine
        if the RA is otherwise proper and able to be accepted.
index 1df56a3..650deb9 100644 (file)
@@ -2787,6 +2787,14 @@ F:       arch/arm64/
 F:     tools/testing/selftests/arm64/
 X:     arch/arm64/boot/dts/
 
+ARROW SPEEDCHIPS XRS7000 SERIES ETHERNET SWITCH DRIVER
+M:     George McCollister <george.mccollister@gmail.com>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml
+F:     drivers/net/dsa/xrs700x/*
+F:     net/dsa/tag_xrs700x.c
+
 AS3645A LED FLASH CONTROLLER DRIVER
 M:     Sakari Ailus <sakari.ailus@iki.fi>
 L:     linux-leds@vger.kernel.org
index b0e4767..61357f7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -649,7 +649,8 @@ ifeq ($(KBUILD_EXTMOD),)
 core-y         := init/ usr/
 drivers-y      := drivers/ sound/
 drivers-$(CONFIG_SAMPLES) += samples/
-drivers-y      += net/ virt/
+drivers-$(CONFIG_NET) += net/
+drivers-y      += virt/
 libs-y         := lib/
 endif # KBUILD_EXTMOD
 
index 22b832f..003309f 100644 (file)
                        qcom,smem-state-names = "ipa-clock-enabled-valid",
                                                "ipa-clock-enabled";
 
-                       modem-remoteproc = <&remoteproc_mpss>;
-
                        status = "disabled";
                };
 
index bcf8883..04b2490 100644 (file)
                        qcom,smem-state-names = "ipa-clock-enabled-valid",
                                                "ipa-clock-enabled";
 
-                       modem-remoteproc = <&mss_pil>;
-
                        status = "disabled";
                };
 
index 260f9f4..1ebb4b9 100644 (file)
@@ -42,6 +42,7 @@ config BONDING
        tristate "Bonding driver support"
        depends on INET
        depends on IPV6 || IPV6=n
+       depends on TLS || TLS_DEVICE=n
        help
          Say 'Y' or 'M' if you wish to be able to 'bond' multiple Ethernet
          Channels together. This is called 'Etherchannel' by Cisco,
index 01e4a19..d9281ae 100644 (file)
@@ -74,7 +74,7 @@ static int can_rx_state_to_frame(struct net_device *dev, enum can_state state)
        }
 }
 
-static const char *can_get_state_str(const enum can_state state)
+const char *can_get_state_str(const enum can_state state)
 {
        switch (state) {
        case CAN_STATE_ERROR_ACTIVE:
@@ -95,6 +95,7 @@ static const char *can_get_state_str(const enum can_state state)
 
        return "<unknown>";
 }
+EXPORT_SYMBOL_GPL(can_get_state_str);
 
 void can_change_state(struct net_device *dev, struct can_frame *cf,
                      enum can_state tx_state, enum can_state rx_state)
index d35c4e8..b48140b 100644 (file)
@@ -27,12 +27,17 @@ static const u8 len2dlc[] = {
        13, 13, 13, 13, 13, 13, 13, 13, /* 25 - 32 */
        14, 14, 14, 14, 14, 14, 14, 14, /* 33 - 40 */
        14, 14, 14, 14, 14, 14, 14, 14, /* 41 - 48 */
+       15, 15, 15, 15, 15, 15, 15, 15, /* 49 - 56 */
+       15, 15, 15, 15, 15, 15, 15, 15  /* 57 - 64 */
 };
 
 /* map the sanitized data length to an appropriate data length code */
 u8 can_fd_len2dlc(u8 len)
 {
-       if (len >= ARRAY_SIZE(len2dlc))
+       /* check for length mapping table size at build time */
+       BUILD_BUG_ON(ARRAY_SIZE(len2dlc) != CANFD_MAX_DLEN + 1);
+
+       if (unlikely(len > CANFD_MAX_DLEN))
                return CANFD_MAX_DLC;
 
        return len2dlc[len];
index 5d9157c..971ada3 100644 (file)
@@ -1975,14 +1975,14 @@ static int flexcan_setup_stop_mode_scfw(struct platform_device *pdev)
        priv = netdev_priv(dev);
        priv->scu_idx = scu_idx;
 
-       /* this function could be defered probe, return -EPROBE_DEFER */
+       /* this function could be deferred probe, return -EPROBE_DEFER */
        return imx_scu_get_handle(&priv->sc_ipc_handle);
 }
 
 /* flexcan_setup_stop_mode - Setup stop mode for wakeup
  *
  * Return: = 0 setup stop mode successfully or doesn't support this feature
- *         < 0 fail to setup stop mode (could be defered probe)
+ *         < 0 fail to setup stop mode (could be deferred probe)
  */
 static int flexcan_setup_stop_mode(struct platform_device *pdev)
 {
index 00e9855..3638b47 100644 (file)
@@ -335,6 +335,8 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv)
        u8 len;
        int i, j;
 
+       netdev_reset_queue(priv->ndev);
+
        /* TEF */
        tef_ring = priv->tef;
        tef_ring->head = 0;
@@ -1249,7 +1251,8 @@ mcp251xfd_handle_tefif_recover(const struct mcp251xfd_priv *priv, const u32 seq)
 
 static int
 mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
-                          const struct mcp251xfd_hw_tef_obj *hw_tef_obj)
+                          const struct mcp251xfd_hw_tef_obj *hw_tef_obj,
+                          unsigned int *frame_len_ptr)
 {
        struct net_device_stats *stats = &priv->ndev->stats;
        u32 seq, seq_masked, tef_tail_masked;
@@ -1271,7 +1274,8 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
        stats->tx_bytes +=
                can_rx_offload_get_echo_skb(&priv->offload,
                                            mcp251xfd_get_tef_tail(priv),
-                                           hw_tef_obj->ts, NULL);
+                                           hw_tef_obj->ts,
+                                           frame_len_ptr);
        stats->tx_packets++;
        priv->tef->tail++;
 
@@ -1308,6 +1312,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
                       const u8 offset, const u8 len)
 {
        const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+       const int val_bytes = regmap_get_val_bytes(priv->map_rx);
 
        if (IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY) &&
            (offset > tx_ring->obj_num ||
@@ -1322,12 +1327,13 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
        return regmap_bulk_read(priv->map_rx,
                                mcp251xfd_get_tef_obj_addr(offset),
                                hw_tef_obj,
-                               sizeof(*hw_tef_obj) / sizeof(u32) * len);
+                               sizeof(*hw_tef_obj) / val_bytes * len);
 }
 
 static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 {
        struct mcp251xfd_hw_tef_obj hw_tef_obj[MCP251XFD_TX_OBJ_NUM_MAX];
+       unsigned int total_frame_len = 0;
        u8 tef_tail, len, l;
        int err, i;
 
@@ -1349,7 +1355,9 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
        }
 
        for (i = 0; i < len; i++) {
-               err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i]);
+               unsigned int frame_len;
+
+               err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i], &frame_len);
                /* -EAGAIN means the Sequence Number in the TEF
                 * doesn't match our tef_tail. This can happen if we
                 * read the TEF objects too early. Leave loop let the
@@ -1359,6 +1367,8 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
                        goto out_netif_wake_queue;
                if (err)
                        return err;
+
+               total_frame_len += frame_len;
        }
 
  out_netif_wake_queue:
@@ -1389,6 +1399,7 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
                        return err;
 
                tx_ring->tail += len;
+               netdev_completed_queue(priv->ndev, len, total_frame_len);
 
                err = mcp251xfd_check_tef_tail(priv);
                if (err)
@@ -1438,6 +1449,7 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
                           struct sk_buff *skb)
 {
        struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+       u8 dlc;
 
        if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_IDE) {
                u32 sid, eid;
@@ -1453,9 +1465,10 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
                                        hw_rx_obj->id);
        }
 
+       dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, hw_rx_obj->flags);
+
        /* CANFD */
        if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF) {
-               u8 dlc;
 
                if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_ESI)
                        cfd->flags |= CANFD_ESI;
@@ -1463,17 +1476,17 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
                if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_BRS)
                        cfd->flags |= CANFD_BRS;
 
-               dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, hw_rx_obj->flags);
                cfd->len = can_fd_dlc2len(dlc);
        } else {
                if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR)
                        cfd->can_id |= CAN_RTR_FLAG;
 
-               cfd->len = can_cc_dlc2len(FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC,
-                                                hw_rx_obj->flags));
+               can_frame_set_cc_len((struct can_frame *)cfd, dlc,
+                                    priv->can.ctrlmode);
        }
 
-       memcpy(cfd->data, hw_rx_obj->data, cfd->len);
+       if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR))
+               memcpy(cfd->data, hw_rx_obj->data, cfd->len);
 }
 
 static int
@@ -1510,12 +1523,13 @@ mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv,
                      struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj,
                      const u8 offset, const u8 len)
 {
+       const int val_bytes = regmap_get_val_bytes(priv->map_rx);
        int err;
 
        err = regmap_bulk_read(priv->map_rx,
                               mcp251xfd_get_rx_obj_addr(ring, offset),
                               hw_rx_obj,
-                              len * ring->obj_size / sizeof(u32));
+                              len * ring->obj_size / val_bytes);
 
        return err;
 }
@@ -2137,6 +2151,7 @@ static int mcp251xfd_handle_spicrcif(struct mcp251xfd_priv *priv)
 static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
 {
        struct mcp251xfd_priv *priv = dev_id;
+       const int val_bytes = regmap_get_val_bytes(priv->map_reg);
        irqreturn_t handled = IRQ_NONE;
        int err;
 
@@ -2162,7 +2177,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
                err = regmap_bulk_read(priv->map_reg, MCP251XFD_REG_INT,
                                       &priv->regs_status,
                                       sizeof(priv->regs_status) /
-                                      sizeof(u32));
+                                      val_bytes);
                if (err)
                        goto out_fail;
 
@@ -2300,7 +2315,7 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
        union mcp251xfd_tx_obj_load_buf *load_buf;
        u8 dlc;
        u32 id, flags;
-       int offset, len;
+       int len_sanitized = 0, len;
 
        if (cfd->can_id & CAN_EFF_FLAG) {
                u32 sid, eid;
@@ -2321,12 +2336,12 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
         * harm, only the lower 7 bits will be transferred into the
         * TEF object.
         */
-       dlc = can_fd_len2dlc(cfd->len);
-       flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK, seq) |
-               FIELD_PREP(MCP251XFD_OBJ_FLAGS_DLC, dlc);
+       flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK, seq);
 
        if (cfd->can_id & CAN_RTR_FLAG)
                flags |= MCP251XFD_OBJ_FLAGS_RTR;
+       else
+               len_sanitized = canfd_sanitize_len(cfd->len);
 
        /* CANFD */
        if (can_is_canfd_skb(skb)) {
@@ -2337,8 +2352,15 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
 
                if (cfd->flags & CANFD_BRS)
                        flags |= MCP251XFD_OBJ_FLAGS_BRS;
+
+               dlc = can_fd_len2dlc(cfd->len);
+       } else {
+               dlc = can_get_cc_dlc((struct can_frame *)cfd,
+                                    priv->can.ctrlmode);
        }
 
+       flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_DLC, dlc);
+
        load_buf = &tx_obj->buf;
        if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_TX)
                hw_tx_obj = &load_buf->crc.hw_tx_obj;
@@ -2348,17 +2370,22 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
        put_unaligned_le32(id, &hw_tx_obj->id);
        put_unaligned_le32(flags, &hw_tx_obj->flags);
 
-       /* Clear data at end of CAN frame */
-       offset = round_down(cfd->len, sizeof(u32));
-       len = round_up(can_fd_dlc2len(dlc), sizeof(u32)) - offset;
-       if (MCP251XFD_SANITIZE_CAN && len)
-               memset(hw_tx_obj->data + offset, 0x0, len);
+       /* Copy data */
        memcpy(hw_tx_obj->data, cfd->data, cfd->len);
 
+       /* Clear unused data at end of CAN frame */
+       if (MCP251XFD_SANITIZE_CAN && len_sanitized) {
+               int pad_len;
+
+               pad_len = len_sanitized - cfd->len;
+               if (pad_len)
+                       memset(hw_tx_obj->data + cfd->len, 0x0, pad_len);
+       }
+
        /* Number of bytes to be written into the RAM of the controller */
        len = sizeof(hw_tx_obj->id) + sizeof(hw_tx_obj->flags);
        if (MCP251XFD_SANITIZE_CAN)
-               len += round_up(can_fd_dlc2len(dlc), sizeof(u32));
+               len += round_up(len_sanitized, sizeof(u32));
        else
                len += round_up(cfd->len, sizeof(u32));
 
@@ -2418,6 +2445,7 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
        struct mcp251xfd_priv *priv = netdev_priv(ndev);
        struct mcp251xfd_tx_ring *tx_ring = priv->tx;
        struct mcp251xfd_tx_obj *tx_obj;
+       unsigned int frame_len;
        u8 tx_head;
        int err;
 
@@ -2433,10 +2461,12 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
        /* Stop queue if we occupy the complete TX FIFO */
        tx_head = mcp251xfd_get_tx_head(tx_ring);
        tx_ring->head++;
-       if (tx_ring->head - tx_ring->tail >= tx_ring->obj_num)
+       if (mcp251xfd_get_tx_free(tx_ring) == 0)
                netif_stop_queue(ndev);
 
-       can_put_echo_skb(skb, ndev, tx_head, 0);
+       frame_len = can_skb_get_frame_len(skb);
+       can_put_echo_skb(skb, ndev, tx_head, frame_len);
+       netdev_sent_queue(priv->ndev, frame_len);
 
        err = mcp251xfd_tx_obj_write(priv, tx_obj);
        if (err)
@@ -2885,7 +2915,8 @@ static int mcp251xfd_probe(struct spi_device *spi)
        priv->can.data_bittiming_const = &mcp251xfd_data_bittiming_const;
        priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK |
                CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_BERR_REPORTING |
-               CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO;
+               CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO |
+               CAN_CTRLMODE_CC_LEN8_DLC;
        priv->ndev = ndev;
        priv->spi = spi;
        priv->rx_int = rx_int;
index 4232a71..1f649d1 100644 (file)
@@ -466,7 +466,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv,
                                    struct mcba_usb_msg_ka_usb *msg)
 {
        if (unlikely(priv->usb_ka_first_pass)) {
-               netdev_info(priv->netdev, "PIC USB version %hhu.%hhu\n",
+               netdev_info(priv->netdev, "PIC USB version %u.%u\n",
                            msg->soft_ver_major, msg->soft_ver_minor);
 
                priv->usb_ka_first_pass = false;
@@ -492,7 +492,7 @@ static void mcba_usb_process_ka_can(struct mcba_priv *priv,
                                    struct mcba_usb_msg_ka_can *msg)
 {
        if (unlikely(priv->can_ka_first_pass)) {
-               netdev_info(priv->netdev, "PIC CAN version %hhu.%hhu\n",
+               netdev_info(priv->netdev, "PIC CAN version %u.%u\n",
                            msg->soft_ver_major, msg->soft_ver_minor);
 
                priv->can_ka_first_pass = false;
@@ -554,7 +554,7 @@ static void mcba_usb_process_rx(struct mcba_priv *priv,
                break;
 
        default:
-               netdev_warn(priv->netdev, "Unsupported msg (0x%hhX)",
+               netdev_warn(priv->netdev, "Unsupported msg (0x%X)",
                            msg->cmd_id);
                break;
        }
index 9a1921e..4cc51fb 100644 (file)
@@ -3,7 +3,7 @@
  * DSA driver for:
  * Hirschmann Hellcreek TSN switch.
  *
- * Copyright (C) 2019,2020 Linutronix GmbH
+ * Copyright (C) 2019-2021 Linutronix GmbH
  * Author Kurt Kanzenbach <kurt@linutronix.de>
  */
 
@@ -153,6 +153,13 @@ static void hellcreek_select_vlan(struct hellcreek *hellcreek, int vid,
        hellcreek_write(hellcreek, val, HR_VIDCFG);
 }
 
+static void hellcreek_select_tgd(struct hellcreek *hellcreek, int port)
+{
+       u16 val = port << TR_TGDSEL_TDGSEL_SHIFT;
+
+       hellcreek_write(hellcreek, val, TR_TGDSEL);
+}
+
 static int hellcreek_wait_until_ready(struct hellcreek *hellcreek)
 {
        u16 val;
@@ -1125,6 +1132,296 @@ out:
        return ret;
 }
 
+static void hellcreek_setup_gcl(struct hellcreek *hellcreek, int port,
+                               const struct tc_taprio_qopt_offload *schedule)
+{
+       const struct tc_taprio_sched_entry *cur, *initial, *next;
+       size_t i;
+
+       cur = initial = &schedule->entries[0];
+       next = cur + 1;
+
+       for (i = 1; i <= schedule->num_entries; ++i) {
+               u16 data;
+               u8 gates;
+
+               cur++;
+               next++;
+
+               if (i == schedule->num_entries)
+                       gates = initial->gate_mask ^
+                               cur->gate_mask;
+               else
+                       gates = next->gate_mask ^
+                               cur->gate_mask;
+
+               data = gates;
+
+               if (i == schedule->num_entries)
+                       data |= TR_GCLDAT_GCLWRLAST;
+
+               /* Gates states */
+               hellcreek_write(hellcreek, data, TR_GCLDAT);
+
+               /* Time interval */
+               hellcreek_write(hellcreek,
+                               cur->interval & 0x0000ffff,
+                               TR_GCLTIL);
+               hellcreek_write(hellcreek,
+                               (cur->interval & 0xffff0000) >> 16,
+                               TR_GCLTIH);
+
+               /* Commit entry */
+               data = ((i - 1) << TR_GCLCMD_GCLWRADR_SHIFT) |
+                       (initial->gate_mask <<
+                        TR_GCLCMD_INIT_GATE_STATES_SHIFT);
+               hellcreek_write(hellcreek, data, TR_GCLCMD);
+       }
+}
+
+static void hellcreek_set_cycle_time(struct hellcreek *hellcreek,
+                                    const struct tc_taprio_qopt_offload *schedule)
+{
+       u32 cycle_time = schedule->cycle_time;
+
+       hellcreek_write(hellcreek, cycle_time & 0x0000ffff, TR_CTWRL);
+       hellcreek_write(hellcreek, (cycle_time & 0xffff0000) >> 16, TR_CTWRH);
+}
+
+static void hellcreek_switch_schedule(struct hellcreek *hellcreek,
+                                     ktime_t start_time)
+{
+       struct timespec64 ts = ktime_to_timespec64(start_time);
+
+       /* Start schedule at this point of time */
+       hellcreek_write(hellcreek, ts.tv_nsec & 0x0000ffff, TR_ESTWRL);
+       hellcreek_write(hellcreek, (ts.tv_nsec & 0xffff0000) >> 16, TR_ESTWRH);
+
+       /* Arm timer, set seconds and switch schedule */
+       hellcreek_write(hellcreek, TR_ESTCMD_ESTARM | TR_ESTCMD_ESTSWCFG |
+                       ((ts.tv_sec & TR_ESTCMD_ESTSEC_MASK) <<
+                        TR_ESTCMD_ESTSEC_SHIFT), TR_ESTCMD);
+}
+
+static bool hellcreek_schedule_startable(struct hellcreek *hellcreek, int port)
+{
+       struct hellcreek_port *hellcreek_port = &hellcreek->ports[port];
+       s64 base_time_ns, current_ns;
+
+       /* The switch allows a schedule to be started only eight seconds within
+        * the future. Therefore, check the current PTP time if the schedule is
+        * startable or not.
+        */
+
+       /* Use the "cached" time. That should be alright, as it's updated quite
+        * frequently in the PTP code.
+        */
+       mutex_lock(&hellcreek->ptp_lock);
+       current_ns = hellcreek->seconds * NSEC_PER_SEC + hellcreek->last_ts;
+       mutex_unlock(&hellcreek->ptp_lock);
+
+       /* Calculate difference to admin base time */
+       base_time_ns = ktime_to_ns(hellcreek_port->current_schedule->base_time);
+
+       return base_time_ns - current_ns < (s64)8 * NSEC_PER_SEC;
+}
+
+static void hellcreek_start_schedule(struct hellcreek *hellcreek, int port)
+{
+       struct hellcreek_port *hellcreek_port = &hellcreek->ports[port];
+       ktime_t base_time, current_time;
+       s64 current_ns;
+       u32 cycle_time;
+
+       /* First select port */
+       hellcreek_select_tgd(hellcreek, port);
+
+       /* Forward base time into the future if needed */
+       mutex_lock(&hellcreek->ptp_lock);
+       current_ns = hellcreek->seconds * NSEC_PER_SEC + hellcreek->last_ts;
+       mutex_unlock(&hellcreek->ptp_lock);
+
+       current_time = ns_to_ktime(current_ns);
+       base_time    = hellcreek_port->current_schedule->base_time;
+       cycle_time   = hellcreek_port->current_schedule->cycle_time;
+
+       if (ktime_compare(current_time, base_time) > 0) {
+               s64 n;
+
+               n = div64_s64(ktime_sub_ns(current_time, base_time),
+                             cycle_time);
+               base_time = ktime_add_ns(base_time, (n + 1) * cycle_time);
+       }
+
+       /* Set admin base time and switch schedule */
+       hellcreek_switch_schedule(hellcreek, base_time);
+
+       taprio_offload_free(hellcreek_port->current_schedule);
+       hellcreek_port->current_schedule = NULL;
+
+       dev_dbg(hellcreek->dev, "Armed EST timer for port %d\n",
+               hellcreek_port->port);
+}
+
+static void hellcreek_check_schedule(struct work_struct *work)
+{
+       struct delayed_work *dw = to_delayed_work(work);
+       struct hellcreek_port *hellcreek_port;
+       struct hellcreek *hellcreek;
+       bool startable;
+
+       hellcreek_port = dw_to_hellcreek_port(dw);
+       hellcreek = hellcreek_port->hellcreek;
+
+       mutex_lock(&hellcreek->reg_lock);
+
+       /* Check starting time */
+       startable = hellcreek_schedule_startable(hellcreek,
+                                                hellcreek_port->port);
+       if (startable) {
+               hellcreek_start_schedule(hellcreek, hellcreek_port->port);
+               mutex_unlock(&hellcreek->reg_lock);
+               return;
+       }
+
+       mutex_unlock(&hellcreek->reg_lock);
+
+       /* Reschedule */
+       schedule_delayed_work(&hellcreek_port->schedule_work,
+                             HELLCREEK_SCHEDULE_PERIOD);
+}
+
+static int hellcreek_port_set_schedule(struct dsa_switch *ds, int port,
+                                      struct tc_taprio_qopt_offload *taprio)
+{
+       struct hellcreek *hellcreek = ds->priv;
+       struct hellcreek_port *hellcreek_port;
+       bool startable;
+       u16 ctrl;
+
+       hellcreek_port = &hellcreek->ports[port];
+
+       dev_dbg(hellcreek->dev, "Configure traffic schedule on port %d\n",
+               port);
+
+       /* First cancel delayed work */
+       cancel_delayed_work_sync(&hellcreek_port->schedule_work);
+
+       mutex_lock(&hellcreek->reg_lock);
+
+       if (hellcreek_port->current_schedule) {
+               taprio_offload_free(hellcreek_port->current_schedule);
+               hellcreek_port->current_schedule = NULL;
+       }
+       hellcreek_port->current_schedule = taprio_offload_get(taprio);
+
+       /* Then select port */
+       hellcreek_select_tgd(hellcreek, port);
+
+       /* Enable gating and keep defaults */
+       ctrl = (0xff << TR_TGDCTRL_ADMINGATESTATES_SHIFT) | TR_TGDCTRL_GATE_EN;
+       hellcreek_write(hellcreek, ctrl, TR_TGDCTRL);
+
+       /* Cancel pending schedule */
+       hellcreek_write(hellcreek, 0x00, TR_ESTCMD);
+
+       /* Setup a new schedule */
+       hellcreek_setup_gcl(hellcreek, port, hellcreek_port->current_schedule);
+
+       /* Configure cycle time */
+       hellcreek_set_cycle_time(hellcreek, hellcreek_port->current_schedule);
+
+       /* Check starting time */
+       startable = hellcreek_schedule_startable(hellcreek, port);
+       if (startable) {
+               hellcreek_start_schedule(hellcreek, port);
+               mutex_unlock(&hellcreek->reg_lock);
+               return 0;
+       }
+
+       mutex_unlock(&hellcreek->reg_lock);
+
+       /* Schedule periodic schedule check */
+       schedule_delayed_work(&hellcreek_port->schedule_work,
+                             HELLCREEK_SCHEDULE_PERIOD);
+
+       return 0;
+}
+
+static int hellcreek_port_del_schedule(struct dsa_switch *ds, int port)
+{
+       struct hellcreek *hellcreek = ds->priv;
+       struct hellcreek_port *hellcreek_port;
+
+       hellcreek_port = &hellcreek->ports[port];
+
+       dev_dbg(hellcreek->dev, "Remove traffic schedule on port %d\n", port);
+
+       /* First cancel delayed work */
+       cancel_delayed_work_sync(&hellcreek_port->schedule_work);
+
+       mutex_lock(&hellcreek->reg_lock);
+
+       if (hellcreek_port->current_schedule) {
+               taprio_offload_free(hellcreek_port->current_schedule);
+               hellcreek_port->current_schedule = NULL;
+       }
+
+       /* Then select port */
+       hellcreek_select_tgd(hellcreek, port);
+
+       /* Disable gating and return to regular switching flow */
+       hellcreek_write(hellcreek, 0xff << TR_TGDCTRL_ADMINGATESTATES_SHIFT,
+                       TR_TGDCTRL);
+
+       mutex_unlock(&hellcreek->reg_lock);
+
+       return 0;
+}
+
+static bool hellcreek_validate_schedule(struct hellcreek *hellcreek,
+                                       struct tc_taprio_qopt_offload *schedule)
+{
+       size_t i;
+
+       /* Does this hellcreek version support Qbv in hardware? */
+       if (!hellcreek->pdata->qbv_support)
+               return false;
+
+       /* cycle time can only be 32bit */
+       if (schedule->cycle_time > (u32)-1)
+               return false;
+
+       /* cycle time extension is not supported */
+       if (schedule->cycle_time_extension)
+               return false;
+
+       /* Only set command is supported */
+       for (i = 0; i < schedule->num_entries; ++i)
+               if (schedule->entries[i].command != TC_TAPRIO_CMD_SET_GATES)
+                       return false;
+
+       return true;
+}
+
+static int hellcreek_port_setup_tc(struct dsa_switch *ds, int port,
+                                  enum tc_setup_type type, void *type_data)
+{
+       struct tc_taprio_qopt_offload *taprio = type_data;
+       struct hellcreek *hellcreek = ds->priv;
+
+       if (type != TC_SETUP_QDISC_TAPRIO)
+               return -EOPNOTSUPP;
+
+       if (!hellcreek_validate_schedule(hellcreek, taprio))
+               return -EOPNOTSUPP;
+
+       if (taprio->enable)
+               return hellcreek_port_set_schedule(ds, port, taprio);
+
+       return hellcreek_port_del_schedule(ds, port);
+}
+
 static const struct dsa_switch_ops hellcreek_ds_ops = {
        .get_ethtool_stats   = hellcreek_get_ethtool_stats,
        .get_sset_count      = hellcreek_get_sset_count,
@@ -1143,6 +1440,7 @@ static const struct dsa_switch_ops hellcreek_ds_ops = {
        .port_hwtstamp_get   = hellcreek_port_hwtstamp_get,
        .port_prechangeupper = hellcreek_port_prechangeupper,
        .port_rxtstamp       = hellcreek_port_rxtstamp,
+       .port_setup_tc       = hellcreek_port_setup_tc,
        .port_stp_state_set  = hellcreek_port_stp_state_set,
        .port_txtstamp       = hellcreek_port_txtstamp,
        .port_vlan_add       = hellcreek_vlan_add,
@@ -1197,6 +1495,9 @@ static int hellcreek_probe(struct platform_device *pdev)
 
                port->hellcreek = hellcreek;
                port->port      = i;
+
+               INIT_DELAYED_WORK(&port->schedule_work,
+                                 hellcreek_check_schedule);
        }
 
        mutex_init(&hellcreek->reg_lock);
index e81781e..854639f 100644 (file)
@@ -3,7 +3,7 @@
  * DSA driver for:
  * Hirschmann Hellcreek TSN switch.
  *
- * Copyright (C) 2019,2020 Linutronix GmbH
+ * Copyright (C) 2019-2021 Linutronix GmbH
  * Author Kurt Kanzenbach <kurt@linutronix.de>
  */
 
@@ -21,6 +21,7 @@
 #include <linux/ptp_clock_kernel.h>
 #include <linux/timecounter.h>
 #include <net/dsa.h>
+#include <net/pkt_sched.h>
 
 /* Ports:
  *  - 0: CPU
@@ -246,6 +247,10 @@ struct hellcreek_port {
 
        /* Per-port timestamping resources */
        struct hellcreek_port_hwtstamp port_hwtstamp;
+
+       /* Per-port Qbv schedule information */
+       struct tc_taprio_qopt_offload *current_schedule;
+       struct delayed_work schedule_work;
 };
 
 struct hellcreek_fdb_entry {
@@ -283,4 +288,14 @@ struct hellcreek {
        size_t fdb_entries;
 };
 
+/* A Qbv schedule can only started up to 8 seconds in the future. If the delta
+ * between the base time and the current ptp time is larger than 8 seconds, then
+ * use periodic work to check for the schedule to be started. The delayed work
+ * cannot be armed directly to $base_time - 8 + X, because for large deltas the
+ * PTP frequency matters.
+ */
+#define HELLCREEK_SCHEDULE_PERIOD      (2 * HZ)
+#define dw_to_hellcreek_port(dw)                               \
+       container_of(dw, struct hellcreek_port, schedule_work)
+
 #endif /* _HELLCREEK_H_ */
index d219619..eb13ba7 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 #include <linux/gpio/consumer.h>
+#include <linux/gpio/driver.h>
 #include <net/dsa.h>
 
 #include "mt7530.h"
@@ -1622,6 +1623,109 @@ mtk_get_tag_protocol(struct dsa_switch *ds, int port,
        }
 }
 
+static inline u32
+mt7530_gpio_to_bit(unsigned int offset)
+{
+       /* Map GPIO offset to register bit
+        * [ 2: 0]  port 0 LED 0..2 as GPIO 0..2
+        * [ 6: 4]  port 1 LED 0..2 as GPIO 3..5
+        * [10: 8]  port 2 LED 0..2 as GPIO 6..8
+        * [14:12]  port 3 LED 0..2 as GPIO 9..11
+        * [18:16]  port 4 LED 0..2 as GPIO 12..14
+        */
+       return BIT(offset + offset / 3);
+}
+
+static int
+mt7530_gpio_get(struct gpio_chip *gc, unsigned int offset)
+{
+       struct mt7530_priv *priv = gpiochip_get_data(gc);
+       u32 bit = mt7530_gpio_to_bit(offset);
+
+       return !!(mt7530_read(priv, MT7530_LED_GPIO_DATA) & bit);
+}
+
+static void
+mt7530_gpio_set(struct gpio_chip *gc, unsigned int offset, int value)
+{
+       struct mt7530_priv *priv = gpiochip_get_data(gc);
+       u32 bit = mt7530_gpio_to_bit(offset);
+
+       if (value)
+               mt7530_set(priv, MT7530_LED_GPIO_DATA, bit);
+       else
+               mt7530_clear(priv, MT7530_LED_GPIO_DATA, bit);
+}
+
+static int
+mt7530_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
+{
+       struct mt7530_priv *priv = gpiochip_get_data(gc);
+       u32 bit = mt7530_gpio_to_bit(offset);
+
+       return (mt7530_read(priv, MT7530_LED_GPIO_DIR) & bit) ?
+               GPIO_LINE_DIRECTION_OUT : GPIO_LINE_DIRECTION_IN;
+}
+
+static int
+mt7530_gpio_direction_input(struct gpio_chip *gc, unsigned int offset)
+{
+       struct mt7530_priv *priv = gpiochip_get_data(gc);
+       u32 bit = mt7530_gpio_to_bit(offset);
+
+       mt7530_clear(priv, MT7530_LED_GPIO_OE, bit);
+       mt7530_clear(priv, MT7530_LED_GPIO_DIR, bit);
+
+       return 0;
+}
+
+static int
+mt7530_gpio_direction_output(struct gpio_chip *gc, unsigned int offset, int value)
+{
+       struct mt7530_priv *priv = gpiochip_get_data(gc);
+       u32 bit = mt7530_gpio_to_bit(offset);
+
+       mt7530_set(priv, MT7530_LED_GPIO_DIR, bit);
+
+       if (value)
+               mt7530_set(priv, MT7530_LED_GPIO_DATA, bit);
+       else
+               mt7530_clear(priv, MT7530_LED_GPIO_DATA, bit);
+
+       mt7530_set(priv, MT7530_LED_GPIO_OE, bit);
+
+       return 0;
+}
+
+static int
+mt7530_setup_gpio(struct mt7530_priv *priv)
+{
+       struct device *dev = priv->dev;
+       struct gpio_chip *gc;
+
+       gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL);
+       if (!gc)
+               return -ENOMEM;
+
+       mt7530_write(priv, MT7530_LED_GPIO_OE, 0);
+       mt7530_write(priv, MT7530_LED_GPIO_DIR, 0);
+       mt7530_write(priv, MT7530_LED_IO_MODE, 0);
+
+       gc->label = "mt7530";
+       gc->parent = dev;
+       gc->owner = THIS_MODULE;
+       gc->get_direction = mt7530_gpio_get_direction;
+       gc->direction_input = mt7530_gpio_direction_input;
+       gc->direction_output = mt7530_gpio_direction_output;
+       gc->get = mt7530_gpio_get;
+       gc->set = mt7530_gpio_set;
+       gc->base = -1;
+       gc->ngpio = 15;
+       gc->can_sleep = true;
+
+       return devm_gpiochip_add_data(dev, gc, priv);
+}
+
 static int
 mt7530_setup(struct dsa_switch *ds)
 {
@@ -1763,6 +1867,12 @@ mt7530_setup(struct dsa_switch *ds)
                }
        }
 
+       if (of_property_read_bool(priv->dev->of_node, "gpio-controller")) {
+               ret = mt7530_setup_gpio(priv);
+               if (ret)
+                       return ret;
+       }
+
        mt7530_setup_port5(ds, interface);
 
        /* Flush the FDB table */
index 32d8969..64a9bb3 100644 (file)
@@ -554,6 +554,26 @@ enum mt7531_clk_skew {
 #define  MT7531_GPIO12_RG_RXD3_MASK    GENMASK(19, 16)
 #define  MT7531_EXT_P_MDIO_12          (2 << 16)
 
+/* Registers for LED GPIO control (MT7530 only)
+ * All registers follow this pattern:
+ * [ 2: 0]  port 0
+ * [ 6: 4]  port 1
+ * [10: 8]  port 2
+ * [14:12]  port 3
+ * [18:16]  port 4
+ */
+
+/* LED enable, 0: Disable, 1: Enable (Default) */
+#define MT7530_LED_EN                  0x7d00
+/* LED mode, 0: GPIO mode, 1: PHY mode (Default) */
+#define MT7530_LED_IO_MODE             0x7d04
+/* GPIO direction, 0: Input, 1: Output */
+#define MT7530_LED_GPIO_DIR            0x7d10
+/* GPIO output enable, 0: Disable, 1: Enable */
+#define MT7530_LED_GPIO_OE             0x7d14
+/* GPIO value, 0: Low, 1: High */
+#define MT7530_LED_GPIO_DATA           0x7d18
+
 #define MT7530_CREV                    0x7ffc
 #define  CHIP_NAME_SHIFT               16
 #define  MT7530_ID                     0x7530
index 51185e4..b175409 100644 (file)
@@ -25,7 +25,6 @@ config NET_DSA_MV88E6XXX_PTP
        default n
        depends on NET_DSA_MV88E6XXX_GLOBAL2
        depends on PTP_1588_CLOCK
-       imply NETWORK_PHY_TIMESTAMPING
        help
          Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
          chips that support it.
index 2f97605..5143649 100644 (file)
@@ -4049,8 +4049,8 @@ static const struct mv88e6xxx_ops mv88e6250_ops = {
        .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
        .pot_clear = mv88e6xxx_g2_pot_clear,
        .reset = mv88e6250_g1_reset,
-       .vtu_getnext = mv88e6250_g1_vtu_getnext,
-       .vtu_loadpurge = mv88e6250_g1_vtu_loadpurge,
+       .vtu_getnext = mv88e6185_g1_vtu_getnext,
+       .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
        .avb_ops = &mv88e6352_avb_ops,
        .ptp_ops = &mv88e6250_ptp_ops,
        .phylink_validate = mv88e6065_phylink_validate,
index 80a182c..7c39696 100644 (file)
@@ -336,10 +336,6 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
                             struct mv88e6xxx_vtu_entry *entry);
 int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
                               struct mv88e6xxx_vtu_entry *entry);
-int mv88e6250_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
-                            struct mv88e6xxx_vtu_entry *entry);
-int mv88e6250_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
-                              struct mv88e6xxx_vtu_entry *entry);
 int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
                             struct mv88e6xxx_vtu_entry *entry);
 int mv88e6352_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
index 7b96396..ae12c98 100644 (file)
@@ -336,39 +336,6 @@ int mv88e6xxx_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
        return mv88e6xxx_g1_vtu_vid_read(chip, entry);
 }
 
-int mv88e6250_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
-                            struct mv88e6xxx_vtu_entry *entry)
-{
-       u16 val;
-       int err;
-
-       err = mv88e6xxx_g1_vtu_getnext(chip, entry);
-       if (err)
-               return err;
-
-       if (entry->valid) {
-               err = mv88e6185_g1_vtu_data_read(chip, entry);
-               if (err)
-                       return err;
-
-               err = mv88e6185_g1_stu_data_read(chip, entry);
-               if (err)
-                       return err;
-
-               /* VTU DBNum[3:0] are located in VTU Operation 3:0
-                * VTU DBNum[5:4] are located in VTU Operation 9:8
-                */
-               err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_OP, &val);
-               if (err)
-                       return err;
-
-               entry->fid = val & 0x000f;
-               entry->fid |= (val & 0x0300) >> 4;
-       }
-
-       return 0;
-}
-
 int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
                             struct mv88e6xxx_vtu_entry *entry)
 {
@@ -389,7 +356,7 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
                        return err;
 
                /* VTU DBNum[3:0] are located in VTU Operation 3:0
-                * VTU DBNum[7:4] are located in VTU Operation 11:8
+                * VTU DBNum[7:4] ([5:4] for 6250) are located in VTU Operation 11:8 (9:8)
                 */
                err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_OP, &val);
                if (err)
@@ -397,6 +364,7 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
 
                entry->fid = val & 0x000f;
                entry->fid |= (val & 0x0f00) >> 4;
+               entry->fid &= mv88e6xxx_num_databases(chip) - 1;
        }
 
        return 0;
@@ -466,35 +434,6 @@ int mv88e6390_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
        return 0;
 }
 
-int mv88e6250_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
-                              struct mv88e6xxx_vtu_entry *entry)
-{
-       u16 op = MV88E6XXX_G1_VTU_OP_VTU_LOAD_PURGE;
-       int err;
-
-       err = mv88e6xxx_g1_vtu_op_wait(chip);
-       if (err)
-               return err;
-
-       err = mv88e6xxx_g1_vtu_vid_write(chip, entry);
-       if (err)
-               return err;
-
-       if (entry->valid) {
-               err = mv88e6185_g1_vtu_data_write(chip, entry);
-               if (err)
-                       return err;
-
-               /* VTU DBNum[3:0] are located in VTU Operation 3:0
-                * VTU DBNum[5:4] are located in VTU Operation 9:8
-                */
-               op |= entry->fid & 0x000f;
-               op |= (entry->fid & 0x0030) << 4;
-       }
-
-       return mv88e6xxx_g1_vtu_op(chip, op);
-}
-
 int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
                               struct mv88e6xxx_vtu_entry *entry)
 {
@@ -516,6 +455,10 @@ int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
 
                /* VTU DBNum[3:0] are located in VTU Operation 3:0
                 * VTU DBNum[7:4] are located in VTU Operation 11:8
+                *
+                * For the 6250/6220, the latter are really [5:4] and
+                * 9:8, but in those cases bits 7:6 of entry->fid are
+                * 0 since they have num_databases = 64.
                 */
                op |= entry->fid & 0x000f;
                op |= (entry->fid & 0x00f0) << 4;
index de50e8b..ad04660 100644 (file)
@@ -33,7 +33,6 @@ source "drivers/net/ethernet/apple/Kconfig"
 source "drivers/net/ethernet/aquantia/Kconfig"
 source "drivers/net/ethernet/arc/Kconfig"
 source "drivers/net/ethernet/atheros/Kconfig"
-source "drivers/net/ethernet/aurora/Kconfig"
 source "drivers/net/ethernet/broadcom/Kconfig"
 source "drivers/net/ethernet/brocade/Kconfig"
 source "drivers/net/ethernet/cadence/Kconfig"
index f8f38dc..1e7dc8a 100644 (file)
@@ -19,7 +19,6 @@ obj-$(CONFIG_NET_VENDOR_APPLE) += apple/
 obj-$(CONFIG_NET_VENDOR_AQUANTIA) += aquantia/
 obj-$(CONFIG_NET_VENDOR_ARC) += arc/
 obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/
-obj-$(CONFIG_NET_VENDOR_AURORA) += aurora/
 obj-$(CONFIG_NET_VENDOR_CADENCE) += cadence/
 obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/
 obj-$(CONFIG_NET_VENDOR_BROCADE) += brocade/
diff --git a/drivers/net/ethernet/aurora/Kconfig b/drivers/net/ethernet/aurora/Kconfig
deleted file mode 100644 (file)
index 9ee30ea..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config NET_VENDOR_AURORA
-       bool "Aurora VLSI devices"
-       default y
-       help
-         If you have a network (Ethernet) device belonging to this class,
-         say Y.
-
-         Note that the answer to this question doesn't directly affect the
-         kernel: saying N will just cause the configurator to skip all
-         questions about Aurora devices. If you say Y, you will be asked
-         for your specific device in the following questions.
-
-if NET_VENDOR_AURORA
-
-config AURORA_NB8800
-       tristate "Aurora AU-NB8800 support"
-       depends on HAS_DMA
-       select PHYLIB
-       help
-        Support for the AU-NB8800 gigabit Ethernet controller.
-
-endif
diff --git a/drivers/net/ethernet/aurora/Makefile b/drivers/net/ethernet/aurora/Makefile
deleted file mode 100644 (file)
index f3d5998..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AURORA_NB8800) += nb8800.o
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
deleted file mode 100644 (file)
index 5b20185..0000000
+++ /dev/null
@@ -1,1520 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2015 Mans Rullgard <mans@mansr.com>
- *
- * Mostly rewritten, based on driver from Sigma Designs.  Original
- * copyright notice below.
- *
- * Driver for tangox SMP864x/SMP865x/SMP867x/SMP868x builtin Ethernet Mac.
- *
- * Copyright (C) 2005 Maxime Bizon <mbizon@freebox.fr>
- */
-
-#include <linux/module.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/of_device.h>
-#include <linux/of_mdio.h>
-#include <linux/of_net.h>
-#include <linux/dma-mapping.h>
-#include <linux/phy.h>
-#include <linux/cache.h>
-#include <linux/jiffies.h>
-#include <linux/io.h>
-#include <linux/iopoll.h>
-#include <asm/barrier.h>
-
-#include "nb8800.h"
-
-static void nb8800_tx_done(struct net_device *dev);
-static int nb8800_dma_stop(struct net_device *dev);
-
-static inline u8 nb8800_readb(struct nb8800_priv *priv, int reg)
-{
-       return readb_relaxed(priv->base + reg);
-}
-
-static inline u32 nb8800_readl(struct nb8800_priv *priv, int reg)
-{
-       return readl_relaxed(priv->base + reg);
-}
-
-static inline void nb8800_writeb(struct nb8800_priv *priv, int reg, u8 val)
-{
-       writeb_relaxed(val, priv->base + reg);
-}
-
-static inline void nb8800_writew(struct nb8800_priv *priv, int reg, u16 val)
-{
-       writew_relaxed(val, priv->base + reg);
-}
-
-static inline void nb8800_writel(struct nb8800_priv *priv, int reg, u32 val)
-{
-       writel_relaxed(val, priv->base + reg);
-}
-
-static inline void nb8800_maskb(struct nb8800_priv *priv, int reg,
-                               u32 mask, u32 val)
-{
-       u32 old = nb8800_readb(priv, reg);
-       u32 new = (old & ~mask) | (val & mask);
-
-       if (new != old)
-               nb8800_writeb(priv, reg, new);
-}
-
-static inline void nb8800_maskl(struct nb8800_priv *priv, int reg,
-                               u32 mask, u32 val)
-{
-       u32 old = nb8800_readl(priv, reg);
-       u32 new = (old & ~mask) | (val & mask);
-
-       if (new != old)
-               nb8800_writel(priv, reg, new);
-}
-
-static inline void nb8800_modb(struct nb8800_priv *priv, int reg, u8 bits,
-                              bool set)
-{
-       nb8800_maskb(priv, reg, bits, set ? bits : 0);
-}
-
-static inline void nb8800_setb(struct nb8800_priv *priv, int reg, u8 bits)
-{
-       nb8800_maskb(priv, reg, bits, bits);
-}
-
-static inline void nb8800_clearb(struct nb8800_priv *priv, int reg, u8 bits)
-{
-       nb8800_maskb(priv, reg, bits, 0);
-}
-
-static inline void nb8800_modl(struct nb8800_priv *priv, int reg, u32 bits,
-                              bool set)
-{
-       nb8800_maskl(priv, reg, bits, set ? bits : 0);
-}
-
-static inline void nb8800_setl(struct nb8800_priv *priv, int reg, u32 bits)
-{
-       nb8800_maskl(priv, reg, bits, bits);
-}
-
-static inline void nb8800_clearl(struct nb8800_priv *priv, int reg, u32 bits)
-{
-       nb8800_maskl(priv, reg, bits, 0);
-}
-
-static int nb8800_mdio_wait(struct mii_bus *bus)
-{
-       struct nb8800_priv *priv = bus->priv;
-       u32 val;
-
-       return readl_poll_timeout_atomic(priv->base + NB8800_MDIO_CMD,
-                                        val, !(val & MDIO_CMD_GO), 1, 1000);
-}
-
-static int nb8800_mdio_cmd(struct mii_bus *bus, u32 cmd)
-{
-       struct nb8800_priv *priv = bus->priv;
-       int err;
-
-       err = nb8800_mdio_wait(bus);
-       if (err)
-               return err;
-
-       nb8800_writel(priv, NB8800_MDIO_CMD, cmd);
-       udelay(10);
-       nb8800_writel(priv, NB8800_MDIO_CMD, cmd | MDIO_CMD_GO);
-
-       return nb8800_mdio_wait(bus);
-}
-
-static int nb8800_mdio_read(struct mii_bus *bus, int phy_id, int reg)
-{
-       struct nb8800_priv *priv = bus->priv;
-       u32 val;
-       int err;
-
-       err = nb8800_mdio_cmd(bus, MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg));
-       if (err)
-               return err;
-
-       val = nb8800_readl(priv, NB8800_MDIO_STS);
-       if (val & MDIO_STS_ERR)
-               return 0xffff;
-
-       return val & 0xffff;
-}
-
-static int nb8800_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
-{
-       u32 cmd = MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg) |
-               MDIO_CMD_DATA(val) | MDIO_CMD_WR;
-
-       return nb8800_mdio_cmd(bus, cmd);
-}
-
-static void nb8800_mac_tx(struct net_device *dev, bool enable)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-
-       while (nb8800_readl(priv, NB8800_TXC_CR) & TCR_EN)
-               cpu_relax();
-
-       nb8800_modb(priv, NB8800_TX_CTL1, TX_EN, enable);
-}
-
-static void nb8800_mac_rx(struct net_device *dev, bool enable)
-{
-       nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_EN, enable);
-}
-
-static void nb8800_mac_af(struct net_device *dev, bool enable)
-{
-       nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_AF_EN, enable);
-}
-
-static void nb8800_start_rx(struct net_device *dev)
-{
-       nb8800_setl(netdev_priv(dev), NB8800_RXC_CR, RCR_EN);
-}
-
-static int nb8800_alloc_rx(struct net_device *dev, unsigned int i, bool napi)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct nb8800_rx_desc *rxd = &priv->rx_descs[i];
-       struct nb8800_rx_buf *rxb = &priv->rx_bufs[i];
-       int size = L1_CACHE_ALIGN(RX_BUF_SIZE);
-       dma_addr_t dma_addr;
-       struct page *page;
-       unsigned long offset;
-       void *data;
-
-       data = napi ? napi_alloc_frag(size) : netdev_alloc_frag(size);
-       if (!data)
-               return -ENOMEM;
-
-       page = virt_to_head_page(data);
-       offset = data - page_address(page);
-
-       dma_addr = dma_map_page(&dev->dev, page, offset, RX_BUF_SIZE,
-                               DMA_FROM_DEVICE);
-
-       if (dma_mapping_error(&dev->dev, dma_addr)) {
-               skb_free_frag(data);
-               return -ENOMEM;
-       }
-
-       rxb->page = page;
-       rxb->offset = offset;
-       rxd->desc.s_addr = dma_addr;
-
-       return 0;
-}
-
-static void nb8800_receive(struct net_device *dev, unsigned int i,
-                          unsigned int len)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct nb8800_rx_desc *rxd = &priv->rx_descs[i];
-       struct page *page = priv->rx_bufs[i].page;
-       int offset = priv->rx_bufs[i].offset;
-       void *data = page_address(page) + offset;
-       dma_addr_t dma = rxd->desc.s_addr;
-       struct sk_buff *skb;
-       unsigned int size;
-       int err;
-
-       size = len <= RX_COPYBREAK ? len : RX_COPYHDR;
-
-       skb = napi_alloc_skb(&priv->napi, size);
-       if (!skb) {
-               netdev_err(dev, "rx skb allocation failed\n");
-               dev->stats.rx_dropped++;
-               return;
-       }
-
-       if (len <= RX_COPYBREAK) {
-               dma_sync_single_for_cpu(&dev->dev, dma, len, DMA_FROM_DEVICE);
-               skb_put_data(skb, data, len);
-               dma_sync_single_for_device(&dev->dev, dma, len,
-                                          DMA_FROM_DEVICE);
-       } else {
-               err = nb8800_alloc_rx(dev, i, true);
-               if (err) {
-                       netdev_err(dev, "rx buffer allocation failed\n");
-                       dev->stats.rx_dropped++;
-                       dev_kfree_skb(skb);
-                       return;
-               }
-
-               dma_unmap_page(&dev->dev, dma, RX_BUF_SIZE, DMA_FROM_DEVICE);
-               skb_put_data(skb, data, RX_COPYHDR);
-               skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-                               offset + RX_COPYHDR, len - RX_COPYHDR,
-                               RX_BUF_SIZE);
-       }
-
-       skb->protocol = eth_type_trans(skb, dev);
-       napi_gro_receive(&priv->napi, skb);
-}
-
-static void nb8800_rx_error(struct net_device *dev, u32 report)
-{
-       if (report & RX_LENGTH_ERR)
-               dev->stats.rx_length_errors++;
-
-       if (report & RX_FCS_ERR)
-               dev->stats.rx_crc_errors++;
-
-       if (report & RX_FIFO_OVERRUN)
-               dev->stats.rx_fifo_errors++;
-
-       if (report & RX_ALIGNMENT_ERROR)
-               dev->stats.rx_frame_errors++;
-
-       dev->stats.rx_errors++;
-}
-
-static int nb8800_poll(struct napi_struct *napi, int budget)
-{
-       struct net_device *dev = napi->dev;
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct nb8800_rx_desc *rxd;
-       unsigned int last = priv->rx_eoc;
-       unsigned int next;
-       int work = 0;
-
-       nb8800_tx_done(dev);
-
-again:
-       do {
-               unsigned int len;
-
-               next = (last + 1) % RX_DESC_COUNT;
-
-               rxd = &priv->rx_descs[next];
-
-               if (!rxd->report)
-                       break;
-
-               len = RX_BYTES_TRANSFERRED(rxd->report);
-
-               if (IS_RX_ERROR(rxd->report))
-                       nb8800_rx_error(dev, rxd->report);
-               else
-                       nb8800_receive(dev, next, len);
-
-               dev->stats.rx_packets++;
-               dev->stats.rx_bytes += len;
-
-               if (rxd->report & RX_MULTICAST_PKT)
-                       dev->stats.multicast++;
-
-               rxd->report = 0;
-               last = next;
-               work++;
-       } while (work < budget);
-
-       if (work) {
-               priv->rx_descs[last].desc.config |= DESC_EOC;
-               wmb();  /* ensure new EOC is written before clearing old */
-               priv->rx_descs[priv->rx_eoc].desc.config &= ~DESC_EOC;
-               priv->rx_eoc = last;
-               nb8800_start_rx(dev);
-       }
-
-       if (work < budget) {
-               nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq);
-
-               /* If a packet arrived after we last checked but
-                * before writing RX_ITR, the interrupt will be
-                * delayed, so we retrieve it now.
-                */
-               if (priv->rx_descs[next].report)
-                       goto again;
-
-               napi_complete_done(napi, work);
-       }
-
-       return work;
-}
-
-static void __nb8800_tx_dma_start(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct nb8800_tx_buf *txb;
-       u32 txc_cr;
-
-       txb = &priv->tx_bufs[priv->tx_queue];
-       if (!txb->ready)
-               return;
-
-       txc_cr = nb8800_readl(priv, NB8800_TXC_CR);
-       if (txc_cr & TCR_EN)
-               return;
-
-       nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc);
-       wmb();          /* ensure desc addr is written before starting DMA */
-       nb8800_writel(priv, NB8800_TXC_CR, txc_cr | TCR_EN);
-
-       priv->tx_queue = (priv->tx_queue + txb->chain_len) % TX_DESC_COUNT;
-}
-
-static void nb8800_tx_dma_start(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-
-       spin_lock_irq(&priv->tx_lock);
-       __nb8800_tx_dma_start(dev);
-       spin_unlock_irq(&priv->tx_lock);
-}
-
-static void nb8800_tx_dma_start_irq(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-
-       spin_lock(&priv->tx_lock);
-       __nb8800_tx_dma_start(dev);
-       spin_unlock(&priv->tx_lock);
-}
-
-static netdev_tx_t nb8800_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct nb8800_tx_desc *txd;
-       struct nb8800_tx_buf *txb;
-       struct nb8800_dma_desc *desc;
-       dma_addr_t dma_addr;
-       unsigned int dma_len;
-       unsigned int align;
-       unsigned int next;
-       bool xmit_more;
-
-       if (atomic_read(&priv->tx_free) <= NB8800_DESC_LOW) {
-               netif_stop_queue(dev);
-               return NETDEV_TX_BUSY;
-       }
-
-       align = (8 - (uintptr_t)skb->data) & 7;
-
-       dma_len = skb->len - align;
-       dma_addr = dma_map_single(&dev->dev, skb->data + align,
-                                 dma_len, DMA_TO_DEVICE);
-
-       if (dma_mapping_error(&dev->dev, dma_addr)) {
-               netdev_err(dev, "tx dma mapping error\n");
-               kfree_skb(skb);
-               dev->stats.tx_dropped++;
-               return NETDEV_TX_OK;
-       }
-
-       xmit_more = netdev_xmit_more();
-       if (atomic_dec_return(&priv->tx_free) <= NB8800_DESC_LOW) {
-               netif_stop_queue(dev);
-               xmit_more = false;
-       }
-
-       next = priv->tx_next;
-       txb = &priv->tx_bufs[next];
-       txd = &priv->tx_descs[next];
-       desc = &txd->desc[0];
-
-       next = (next + 1) % TX_DESC_COUNT;
-
-       if (align) {
-               memcpy(txd->buf, skb->data, align);
-
-               desc->s_addr =
-                       txb->dma_desc + offsetof(struct nb8800_tx_desc, buf);
-               desc->n_addr = txb->dma_desc + sizeof(txd->desc[0]);
-               desc->config = DESC_BTS(2) | DESC_DS | align;
-
-               desc++;
-       }
-
-       desc->s_addr = dma_addr;
-       desc->n_addr = priv->tx_bufs[next].dma_desc;
-       desc->config = DESC_BTS(2) | DESC_DS | DESC_EOF | dma_len;
-
-       if (!xmit_more)
-               desc->config |= DESC_EOC;
-
-       txb->skb = skb;
-       txb->dma_addr = dma_addr;
-       txb->dma_len = dma_len;
-
-       if (!priv->tx_chain) {
-               txb->chain_len = 1;
-               priv->tx_chain = txb;
-       } else {
-               priv->tx_chain->chain_len++;
-       }
-
-       netdev_sent_queue(dev, skb->len);
-
-       priv->tx_next = next;
-
-       if (!xmit_more) {
-               smp_wmb();
-               priv->tx_chain->ready = true;
-               priv->tx_chain = NULL;
-               nb8800_tx_dma_start(dev);
-       }
-
-       return NETDEV_TX_OK;
-}
-
-static void nb8800_tx_error(struct net_device *dev, u32 report)
-{
-       if (report & TX_LATE_COLLISION)
-               dev->stats.collisions++;
-
-       if (report & TX_PACKET_DROPPED)
-               dev->stats.tx_dropped++;
-
-       if (report & TX_FIFO_UNDERRUN)
-               dev->stats.tx_fifo_errors++;
-
-       dev->stats.tx_errors++;
-}
-
-static void nb8800_tx_done(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       unsigned int limit = priv->tx_next;
-       unsigned int done = priv->tx_done;
-       unsigned int packets = 0;
-       unsigned int len = 0;
-
-       while (done != limit) {
-               struct nb8800_tx_desc *txd = &priv->tx_descs[done];
-               struct nb8800_tx_buf *txb = &priv->tx_bufs[done];
-               struct sk_buff *skb;
-
-               if (!txd->report)
-                       break;
-
-               skb = txb->skb;
-               len += skb->len;
-
-               dma_unmap_single(&dev->dev, txb->dma_addr, txb->dma_len,
-                                DMA_TO_DEVICE);
-
-               if (IS_TX_ERROR(txd->report)) {
-                       nb8800_tx_error(dev, txd->report);
-                       kfree_skb(skb);
-               } else {
-                       consume_skb(skb);
-               }
-
-               dev->stats.tx_packets++;
-               dev->stats.tx_bytes += TX_BYTES_TRANSFERRED(txd->report);
-               dev->stats.collisions += TX_EARLY_COLLISIONS(txd->report);
-
-               txb->skb = NULL;
-               txb->ready = false;
-               txd->report = 0;
-
-               done = (done + 1) % TX_DESC_COUNT;
-               packets++;
-       }
-
-       if (packets) {
-               smp_mb__before_atomic();
-               atomic_add(packets, &priv->tx_free);
-               netdev_completed_queue(dev, packets, len);
-               netif_wake_queue(dev);
-               priv->tx_done = done;
-       }
-}
-
-static irqreturn_t nb8800_irq(int irq, void *dev_id)
-{
-       struct net_device *dev = dev_id;
-       struct nb8800_priv *priv = netdev_priv(dev);
-       irqreturn_t ret = IRQ_NONE;
-       u32 val;
-
-       /* tx interrupt */
-       val = nb8800_readl(priv, NB8800_TXC_SR);
-       if (val) {
-               nb8800_writel(priv, NB8800_TXC_SR, val);
-
-               if (val & TSR_DI)
-                       nb8800_tx_dma_start_irq(dev);
-
-               if (val & TSR_TI)
-                       napi_schedule_irqoff(&priv->napi);
-
-               if (unlikely(val & TSR_DE))
-                       netdev_err(dev, "TX DMA error\n");
-
-               /* should never happen with automatic status retrieval */
-               if (unlikely(val & TSR_TO))
-                       netdev_err(dev, "TX Status FIFO overflow\n");
-
-               ret = IRQ_HANDLED;
-       }
-
-       /* rx interrupt */
-       val = nb8800_readl(priv, NB8800_RXC_SR);
-       if (val) {
-               nb8800_writel(priv, NB8800_RXC_SR, val);
-
-               if (likely(val & (RSR_RI | RSR_DI))) {
-                       nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_poll);
-                       napi_schedule_irqoff(&priv->napi);
-               }
-
-               if (unlikely(val & RSR_DE))
-                       netdev_err(dev, "RX DMA error\n");
-
-               /* should never happen with automatic status retrieval */
-               if (unlikely(val & RSR_RO))
-                       netdev_err(dev, "RX Status FIFO overflow\n");
-
-               ret = IRQ_HANDLED;
-       }
-
-       return ret;
-}
-
-static void nb8800_mac_config(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       bool gigabit = priv->speed == SPEED_1000;
-       u32 mac_mode_mask = RGMII_MODE | HALF_DUPLEX | GMAC_MODE;
-       u32 mac_mode = 0;
-       u32 slot_time;
-       u32 phy_clk;
-       u32 ict;
-
-       if (!priv->duplex)
-               mac_mode |= HALF_DUPLEX;
-
-       if (gigabit) {
-               if (phy_interface_is_rgmii(dev->phydev))
-                       mac_mode |= RGMII_MODE;
-
-               mac_mode |= GMAC_MODE;
-               phy_clk = 125000000;
-
-               /* Should be 512 but register is only 8 bits */
-               slot_time = 255;
-       } else {
-               phy_clk = 25000000;
-               slot_time = 128;
-       }
-
-       ict = DIV_ROUND_UP(phy_clk, clk_get_rate(priv->clk));
-
-       nb8800_writeb(priv, NB8800_IC_THRESHOLD, ict);
-       nb8800_writeb(priv, NB8800_SLOT_TIME, slot_time);
-       nb8800_maskb(priv, NB8800_MAC_MODE, mac_mode_mask, mac_mode);
-}
-
-static void nb8800_pause_config(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct phy_device *phydev = dev->phydev;
-       u32 rxcr;
-
-       if (priv->pause_aneg) {
-               if (!phydev || !phydev->link)
-                       return;
-
-               priv->pause_rx = phydev->pause;
-               priv->pause_tx = phydev->pause ^ phydev->asym_pause;
-       }
-
-       nb8800_modb(priv, NB8800_RX_CTL, RX_PAUSE_EN, priv->pause_rx);
-
-       rxcr = nb8800_readl(priv, NB8800_RXC_CR);
-       if (!!(rxcr & RCR_FL) == priv->pause_tx)
-               return;
-
-       if (netif_running(dev)) {
-               napi_disable(&priv->napi);
-               netif_tx_lock_bh(dev);
-               nb8800_dma_stop(dev);
-               nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx);
-               nb8800_start_rx(dev);
-               netif_tx_unlock_bh(dev);
-               napi_enable(&priv->napi);
-       } else {
-               nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx);
-       }
-}
-
-static void nb8800_link_reconfigure(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct phy_device *phydev = dev->phydev;
-       int change = 0;
-
-       if (phydev->link) {
-               if (phydev->speed != priv->speed) {
-                       priv->speed = phydev->speed;
-                       change = 1;
-               }
-
-               if (phydev->duplex != priv->duplex) {
-                       priv->duplex = phydev->duplex;
-                       change = 1;
-               }
-
-               if (change)
-                       nb8800_mac_config(dev);
-
-               nb8800_pause_config(dev);
-       }
-
-       if (phydev->link != priv->link) {
-               priv->link = phydev->link;
-               change = 1;
-       }
-
-       if (change)
-               phy_print_status(phydev);
-}
-
-static void nb8800_update_mac_addr(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       int i;
-
-       for (i = 0; i < ETH_ALEN; i++)
-               nb8800_writeb(priv, NB8800_SRC_ADDR(i), dev->dev_addr[i]);
-
-       for (i = 0; i < ETH_ALEN; i++)
-               nb8800_writeb(priv, NB8800_UC_ADDR(i), dev->dev_addr[i]);
-}
-
-static int nb8800_set_mac_address(struct net_device *dev, void *addr)
-{
-       struct sockaddr *sock = addr;
-
-       if (netif_running(dev))
-               return -EBUSY;
-
-       ether_addr_copy(dev->dev_addr, sock->sa_data);
-       nb8800_update_mac_addr(dev);
-
-       return 0;
-}
-
-static void nb8800_mc_init(struct net_device *dev, int val)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-
-       nb8800_writeb(priv, NB8800_MC_INIT, val);
-       readb_poll_timeout_atomic(priv->base + NB8800_MC_INIT, val, !val,
-                                 1, 1000);
-}
-
-static void nb8800_set_rx_mode(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct netdev_hw_addr *ha;
-       int i;
-
-       if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
-               nb8800_mac_af(dev, false);
-               return;
-       }
-
-       nb8800_mac_af(dev, true);
-       nb8800_mc_init(dev, 0);
-
-       netdev_for_each_mc_addr(ha, dev) {
-               for (i = 0; i < ETH_ALEN; i++)
-                       nb8800_writeb(priv, NB8800_MC_ADDR(i), ha->addr[i]);
-
-               nb8800_mc_init(dev, 0xff);
-       }
-}
-
-#define RX_DESC_SIZE (RX_DESC_COUNT * sizeof(struct nb8800_rx_desc))
-#define TX_DESC_SIZE (TX_DESC_COUNT * sizeof(struct nb8800_tx_desc))
-
-static void nb8800_dma_free(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       unsigned int i;
-
-       if (priv->rx_bufs) {
-               for (i = 0; i < RX_DESC_COUNT; i++)
-                       if (priv->rx_bufs[i].page)
-                               put_page(priv->rx_bufs[i].page);
-
-               kfree(priv->rx_bufs);
-               priv->rx_bufs = NULL;
-       }
-
-       if (priv->tx_bufs) {
-               for (i = 0; i < TX_DESC_COUNT; i++)
-                       kfree_skb(priv->tx_bufs[i].skb);
-
-               kfree(priv->tx_bufs);
-               priv->tx_bufs = NULL;
-       }
-
-       if (priv->rx_descs) {
-               dma_free_coherent(dev->dev.parent, RX_DESC_SIZE, priv->rx_descs,
-                                 priv->rx_desc_dma);
-               priv->rx_descs = NULL;
-       }
-
-       if (priv->tx_descs) {
-               dma_free_coherent(dev->dev.parent, TX_DESC_SIZE, priv->tx_descs,
-                                 priv->tx_desc_dma);
-               priv->tx_descs = NULL;
-       }
-}
-
-static void nb8800_dma_reset(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct nb8800_rx_desc *rxd;
-       struct nb8800_tx_desc *txd;
-       unsigned int i;
-
-       for (i = 0; i < RX_DESC_COUNT; i++) {
-               dma_addr_t rx_dma = priv->rx_desc_dma + i * sizeof(*rxd);
-
-               rxd = &priv->rx_descs[i];
-               rxd->desc.n_addr = rx_dma + sizeof(*rxd);
-               rxd->desc.r_addr =
-                       rx_dma + offsetof(struct nb8800_rx_desc, report);
-               rxd->desc.config = priv->rx_dma_config;
-               rxd->report = 0;
-       }
-
-       rxd->desc.n_addr = priv->rx_desc_dma;
-       rxd->desc.config |= DESC_EOC;
-
-       priv->rx_eoc = RX_DESC_COUNT - 1;
-
-       for (i = 0; i < TX_DESC_COUNT; i++) {
-               struct nb8800_tx_buf *txb = &priv->tx_bufs[i];
-               dma_addr_t r_dma = txb->dma_desc +
-                       offsetof(struct nb8800_tx_desc, report);
-
-               txd = &priv->tx_descs[i];
-               txd->desc[0].r_addr = r_dma;
-               txd->desc[1].r_addr = r_dma;
-               txd->report = 0;
-       }
-
-       priv->tx_next = 0;
-       priv->tx_queue = 0;
-       priv->tx_done = 0;
-       atomic_set(&priv->tx_free, TX_DESC_COUNT);
-
-       nb8800_writel(priv, NB8800_RX_DESC_ADDR, priv->rx_desc_dma);
-
-       wmb();          /* ensure all setup is written before starting */
-}
-
-static int nb8800_dma_init(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       unsigned int n_rx = RX_DESC_COUNT;
-       unsigned int n_tx = TX_DESC_COUNT;
-       unsigned int i;
-       int err;
-
-       priv->rx_descs = dma_alloc_coherent(dev->dev.parent, RX_DESC_SIZE,
-                                           &priv->rx_desc_dma, GFP_KERNEL);
-       if (!priv->rx_descs)
-               goto err_out;
-
-       priv->rx_bufs = kcalloc(n_rx, sizeof(*priv->rx_bufs), GFP_KERNEL);
-       if (!priv->rx_bufs)
-               goto err_out;
-
-       for (i = 0; i < n_rx; i++) {
-               err = nb8800_alloc_rx(dev, i, false);
-               if (err)
-                       goto err_out;
-       }
-
-       priv->tx_descs = dma_alloc_coherent(dev->dev.parent, TX_DESC_SIZE,
-                                           &priv->tx_desc_dma, GFP_KERNEL);
-       if (!priv->tx_descs)
-               goto err_out;
-
-       priv->tx_bufs = kcalloc(n_tx, sizeof(*priv->tx_bufs), GFP_KERNEL);
-       if (!priv->tx_bufs)
-               goto err_out;
-
-       for (i = 0; i < n_tx; i++)
-               priv->tx_bufs[i].dma_desc =
-                       priv->tx_desc_dma + i * sizeof(struct nb8800_tx_desc);
-
-       nb8800_dma_reset(dev);
-
-       return 0;
-
-err_out:
-       nb8800_dma_free(dev);
-
-       return -ENOMEM;
-}
-
-static int nb8800_dma_stop(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct nb8800_tx_buf *txb = &priv->tx_bufs[0];
-       struct nb8800_tx_desc *txd = &priv->tx_descs[0];
-       int retry = 5;
-       u32 txcr;
-       u32 rxcr;
-       int err;
-       unsigned int i;
-
-       /* wait for tx to finish */
-       err = readl_poll_timeout_atomic(priv->base + NB8800_TXC_CR, txcr,
-                                       !(txcr & TCR_EN) &&
-                                       priv->tx_done == priv->tx_next,
-                                       1000, 1000000);
-       if (err)
-               return err;
-
-       /* The rx DMA only stops if it reaches the end of chain.
-        * To make this happen, we set the EOC flag on all rx
-        * descriptors, put the device in loopback mode, and send
-        * a few dummy frames.  The interrupt handler will ignore
-        * these since NAPI is disabled and no real frames are in
-        * the tx queue.
-        */
-
-       for (i = 0; i < RX_DESC_COUNT; i++)
-               priv->rx_descs[i].desc.config |= DESC_EOC;
-
-       txd->desc[0].s_addr =
-               txb->dma_desc + offsetof(struct nb8800_tx_desc, buf);
-       txd->desc[0].config = DESC_BTS(2) | DESC_DS | DESC_EOF | DESC_EOC | 8;
-       memset(txd->buf, 0, sizeof(txd->buf));
-
-       nb8800_mac_af(dev, false);
-       nb8800_setb(priv, NB8800_MAC_MODE, LOOPBACK_EN);
-
-       do {
-               nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc);
-               wmb();
-               nb8800_writel(priv, NB8800_TXC_CR, txcr | TCR_EN);
-
-               err = readl_poll_timeout_atomic(priv->base + NB8800_RXC_CR,
-                                               rxcr, !(rxcr & RCR_EN),
-                                               1000, 100000);
-       } while (err && --retry);
-
-       nb8800_mac_af(dev, true);
-       nb8800_clearb(priv, NB8800_MAC_MODE, LOOPBACK_EN);
-       nb8800_dma_reset(dev);
-
-       return retry ? 0 : -ETIMEDOUT;
-}
-
-static void nb8800_pause_adv(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct phy_device *phydev = dev->phydev;
-
-       if (!phydev)
-               return;
-
-       phy_set_asym_pause(phydev, priv->pause_rx, priv->pause_tx);
-}
-
-static int nb8800_open(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct phy_device *phydev;
-       int err;
-
-       /* clear any pending interrupts */
-       nb8800_writel(priv, NB8800_RXC_SR, 0xf);
-       nb8800_writel(priv, NB8800_TXC_SR, 0xf);
-
-       err = nb8800_dma_init(dev);
-       if (err)
-               return err;
-
-       err = request_irq(dev->irq, nb8800_irq, 0, dev_name(&dev->dev), dev);
-       if (err)
-               goto err_free_dma;
-
-       nb8800_mac_rx(dev, true);
-       nb8800_mac_tx(dev, true);
-
-       phydev = of_phy_connect(dev, priv->phy_node,
-                               nb8800_link_reconfigure, 0,
-                               priv->phy_mode);
-       if (!phydev) {
-               err = -ENODEV;
-               goto err_free_irq;
-       }
-
-       nb8800_pause_adv(dev);
-
-       netdev_reset_queue(dev);
-       napi_enable(&priv->napi);
-       netif_start_queue(dev);
-
-       nb8800_start_rx(dev);
-       phy_start(phydev);
-
-       return 0;
-
-err_free_irq:
-       free_irq(dev->irq, dev);
-err_free_dma:
-       nb8800_dma_free(dev);
-
-       return err;
-}
-
-static int nb8800_stop(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct phy_device *phydev = dev->phydev;
-
-       phy_stop(phydev);
-
-       netif_stop_queue(dev);
-       napi_disable(&priv->napi);
-
-       nb8800_dma_stop(dev);
-       nb8800_mac_rx(dev, false);
-       nb8800_mac_tx(dev, false);
-
-       phy_disconnect(phydev);
-
-       free_irq(dev->irq, dev);
-
-       nb8800_dma_free(dev);
-
-       return 0;
-}
-
-static const struct net_device_ops nb8800_netdev_ops = {
-       .ndo_open               = nb8800_open,
-       .ndo_stop               = nb8800_stop,
-       .ndo_start_xmit         = nb8800_xmit,
-       .ndo_set_mac_address    = nb8800_set_mac_address,
-       .ndo_set_rx_mode        = nb8800_set_rx_mode,
-       .ndo_do_ioctl           = phy_do_ioctl,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-static void nb8800_get_pauseparam(struct net_device *dev,
-                                 struct ethtool_pauseparam *pp)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-
-       pp->autoneg = priv->pause_aneg;
-       pp->rx_pause = priv->pause_rx;
-       pp->tx_pause = priv->pause_tx;
-}
-
-static int nb8800_set_pauseparam(struct net_device *dev,
-                                struct ethtool_pauseparam *pp)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       struct phy_device *phydev = dev->phydev;
-
-       priv->pause_aneg = pp->autoneg;
-       priv->pause_rx = pp->rx_pause;
-       priv->pause_tx = pp->tx_pause;
-
-       nb8800_pause_adv(dev);
-
-       if (!priv->pause_aneg)
-               nb8800_pause_config(dev);
-       else if (phydev)
-               phy_start_aneg(phydev);
-
-       return 0;
-}
-
-static const char nb8800_stats_names[][ETH_GSTRING_LEN] = {
-       "rx_bytes_ok",
-       "rx_frames_ok",
-       "rx_undersize_frames",
-       "rx_fragment_frames",
-       "rx_64_byte_frames",
-       "rx_127_byte_frames",
-       "rx_255_byte_frames",
-       "rx_511_byte_frames",
-       "rx_1023_byte_frames",
-       "rx_max_size_frames",
-       "rx_oversize_frames",
-       "rx_bad_fcs_frames",
-       "rx_broadcast_frames",
-       "rx_multicast_frames",
-       "rx_control_frames",
-       "rx_pause_frames",
-       "rx_unsup_control_frames",
-       "rx_align_error_frames",
-       "rx_overrun_frames",
-       "rx_jabber_frames",
-       "rx_bytes",
-       "rx_frames",
-
-       "tx_bytes_ok",
-       "tx_frames_ok",
-       "tx_64_byte_frames",
-       "tx_127_byte_frames",
-       "tx_255_byte_frames",
-       "tx_511_byte_frames",
-       "tx_1023_byte_frames",
-       "tx_max_size_frames",
-       "tx_oversize_frames",
-       "tx_broadcast_frames",
-       "tx_multicast_frames",
-       "tx_control_frames",
-       "tx_pause_frames",
-       "tx_underrun_frames",
-       "tx_single_collision_frames",
-       "tx_multi_collision_frames",
-       "tx_deferred_collision_frames",
-       "tx_late_collision_frames",
-       "tx_excessive_collision_frames",
-       "tx_bytes",
-       "tx_frames",
-       "tx_collisions",
-};
-
-#define NB8800_NUM_STATS ARRAY_SIZE(nb8800_stats_names)
-
-static int nb8800_get_sset_count(struct net_device *dev, int sset)
-{
-       if (sset == ETH_SS_STATS)
-               return NB8800_NUM_STATS;
-
-       return -EOPNOTSUPP;
-}
-
-static void nb8800_get_strings(struct net_device *dev, u32 sset, u8 *buf)
-{
-       if (sset == ETH_SS_STATS)
-               memcpy(buf, &nb8800_stats_names, sizeof(nb8800_stats_names));
-}
-
-static u32 nb8800_read_stat(struct net_device *dev, int index)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-
-       nb8800_writeb(priv, NB8800_STAT_INDEX, index);
-
-       return nb8800_readl(priv, NB8800_STAT_DATA);
-}
-
-static void nb8800_get_ethtool_stats(struct net_device *dev,
-                                    struct ethtool_stats *estats, u64 *st)
-{
-       unsigned int i;
-       u32 rx, tx;
-
-       for (i = 0; i < NB8800_NUM_STATS / 2; i++) {
-               rx = nb8800_read_stat(dev, i);
-               tx = nb8800_read_stat(dev, i | 0x80);
-               st[i] = rx;
-               st[i + NB8800_NUM_STATS / 2] = tx;
-       }
-}
-
-static const struct ethtool_ops nb8800_ethtool_ops = {
-       .nway_reset             = phy_ethtool_nway_reset,
-       .get_link               = ethtool_op_get_link,
-       .get_pauseparam         = nb8800_get_pauseparam,
-       .set_pauseparam         = nb8800_set_pauseparam,
-       .get_sset_count         = nb8800_get_sset_count,
-       .get_strings            = nb8800_get_strings,
-       .get_ethtool_stats      = nb8800_get_ethtool_stats,
-       .get_link_ksettings     = phy_ethtool_get_link_ksettings,
-       .set_link_ksettings     = phy_ethtool_set_link_ksettings,
-};
-
-static int nb8800_hw_init(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       u32 val;
-
-       val = TX_RETRY_EN | TX_PAD_EN | TX_APPEND_FCS;
-       nb8800_writeb(priv, NB8800_TX_CTL1, val);
-
-       /* Collision retry count */
-       nb8800_writeb(priv, NB8800_TX_CTL2, 5);
-
-       val = RX_PAD_STRIP | RX_AF_EN;
-       nb8800_writeb(priv, NB8800_RX_CTL, val);
-
-       /* Chosen by fair dice roll */
-       nb8800_writeb(priv, NB8800_RANDOM_SEED, 4);
-
-       /* TX cycles per deferral period */
-       nb8800_writeb(priv, NB8800_TX_SDP, 12);
-
-       /* The following three threshold values have been
-        * experimentally determined for good results.
-        */
-
-       /* RX/TX FIFO threshold for partial empty (64-bit entries) */
-       nb8800_writeb(priv, NB8800_PE_THRESHOLD, 0);
-
-       /* RX/TX FIFO threshold for partial full (64-bit entries) */
-       nb8800_writeb(priv, NB8800_PF_THRESHOLD, 255);
-
-       /* Buffer size for transmit (64-bit entries) */
-       nb8800_writeb(priv, NB8800_TX_BUFSIZE, 64);
-
-       /* Configure tx DMA */
-
-       val = nb8800_readl(priv, NB8800_TXC_CR);
-       val &= TCR_LE;          /* keep endian setting */
-       val |= TCR_DM;          /* DMA descriptor mode */
-       val |= TCR_RS;          /* automatically store tx status  */
-       val |= TCR_DIE;         /* interrupt on DMA chain completion */
-       val |= TCR_TFI(7);      /* interrupt after 7 frames transmitted */
-       val |= TCR_BTS(2);      /* 32-byte bus transaction size */
-       nb8800_writel(priv, NB8800_TXC_CR, val);
-
-       /* TX complete interrupt after 10 ms or 7 frames (see above) */
-       val = clk_get_rate(priv->clk) / 100;
-       nb8800_writel(priv, NB8800_TX_ITR, val);
-
-       /* Configure rx DMA */
-
-       val = nb8800_readl(priv, NB8800_RXC_CR);
-       val &= RCR_LE;          /* keep endian setting */
-       val |= RCR_DM;          /* DMA descriptor mode */
-       val |= RCR_RS;          /* automatically store rx status */
-       val |= RCR_DIE;         /* interrupt at end of DMA chain */
-       val |= RCR_RFI(7);      /* interrupt after 7 frames received */
-       val |= RCR_BTS(2);      /* 32-byte bus transaction size */
-       nb8800_writel(priv, NB8800_RXC_CR, val);
-
-       /* The rx interrupt can fire before the DMA has completed
-        * unless a small delay is added.  50 us is hopefully enough.
-        */
-       priv->rx_itr_irq = clk_get_rate(priv->clk) / 20000;
-
-       /* In NAPI poll mode we want to disable interrupts, but the
-        * hardware does not permit this.  Delay 10 ms instead.
-        */
-       priv->rx_itr_poll = clk_get_rate(priv->clk) / 100;
-
-       nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq);
-
-       priv->rx_dma_config = RX_BUF_SIZE | DESC_BTS(2) | DESC_DS | DESC_EOF;
-
-       /* Flow control settings */
-
-       /* Pause time of 0.1 ms */
-       val = 100000 / 512;
-       nb8800_writeb(priv, NB8800_PQ1, val >> 8);
-       nb8800_writeb(priv, NB8800_PQ2, val & 0xff);
-
-       /* Auto-negotiate by default */
-       priv->pause_aneg = true;
-       priv->pause_rx = true;
-       priv->pause_tx = true;
-
-       nb8800_mc_init(dev, 0);
-
-       return 0;
-}
-
-static int nb8800_tangox_init(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       u32 pad_mode = PAD_MODE_MII;
-
-       switch (priv->phy_mode) {
-       case PHY_INTERFACE_MODE_MII:
-       case PHY_INTERFACE_MODE_GMII:
-               pad_mode = PAD_MODE_MII;
-               break;
-
-       case PHY_INTERFACE_MODE_RGMII:
-       case PHY_INTERFACE_MODE_RGMII_ID:
-       case PHY_INTERFACE_MODE_RGMII_RXID:
-       case PHY_INTERFACE_MODE_RGMII_TXID:
-               pad_mode = PAD_MODE_RGMII;
-               break;
-
-       default:
-               dev_err(dev->dev.parent, "unsupported phy mode %s\n",
-                       phy_modes(priv->phy_mode));
-               return -EINVAL;
-       }
-
-       nb8800_writeb(priv, NB8800_TANGOX_PAD_MODE, pad_mode);
-
-       return 0;
-}
-
-static int nb8800_tangox_reset(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       int clk_div;
-
-       nb8800_writeb(priv, NB8800_TANGOX_RESET, 0);
-       usleep_range(1000, 10000);
-       nb8800_writeb(priv, NB8800_TANGOX_RESET, 1);
-
-       wmb();          /* ensure reset is cleared before proceeding */
-
-       clk_div = DIV_ROUND_UP(clk_get_rate(priv->clk), 2 * MAX_MDC_CLOCK);
-       nb8800_writew(priv, NB8800_TANGOX_MDIO_CLKDIV, clk_div);
-
-       return 0;
-}
-
-static const struct nb8800_ops nb8800_tangox_ops = {
-       .init   = nb8800_tangox_init,
-       .reset  = nb8800_tangox_reset,
-};
-
-static int nb8800_tango4_init(struct net_device *dev)
-{
-       struct nb8800_priv *priv = netdev_priv(dev);
-       int err;
-
-       err = nb8800_tangox_init(dev);
-       if (err)
-               return err;
-
-       /* On tango4 interrupt on DMA completion per frame works and gives
-        * better performance despite generating more rx interrupts.
-        */
-
-       /* Disable unnecessary interrupt on rx completion */
-       nb8800_clearl(priv, NB8800_RXC_CR, RCR_RFI(7));
-
-       /* Request interrupt on descriptor DMA completion */
-       priv->rx_dma_config |= DESC_ID;
-
-       return 0;
-}
-
-static const struct nb8800_ops nb8800_tango4_ops = {
-       .init   = nb8800_tango4_init,
-       .reset  = nb8800_tangox_reset,
-};
-
-static const struct of_device_id nb8800_dt_ids[] = {
-       {
-               .compatible = "aurora,nb8800",
-       },
-       {
-               .compatible = "sigma,smp8642-ethernet",
-               .data = &nb8800_tangox_ops,
-       },
-       {
-               .compatible = "sigma,smp8734-ethernet",
-               .data = &nb8800_tango4_ops,
-       },
-       { }
-};
-MODULE_DEVICE_TABLE(of, nb8800_dt_ids);
-
-static int nb8800_probe(struct platform_device *pdev)
-{
-       const struct of_device_id *match;
-       const struct nb8800_ops *ops = NULL;
-       struct nb8800_priv *priv;
-       struct resource *res;
-       struct net_device *dev;
-       struct mii_bus *bus;
-       const unsigned char *mac;
-       void __iomem *base;
-       int irq;
-       int ret;
-
-       match = of_match_device(nb8800_dt_ids, &pdev->dev);
-       if (match)
-               ops = match->data;
-
-       irq = platform_get_irq(pdev, 0);
-       if (irq <= 0)
-               return -EINVAL;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(base))
-               return PTR_ERR(base);
-
-       dev_dbg(&pdev->dev, "AU-NB8800 Ethernet at %pa\n", &res->start);
-
-       dev = alloc_etherdev(sizeof(*priv));
-       if (!dev)
-               return -ENOMEM;
-
-       platform_set_drvdata(pdev, dev);
-       SET_NETDEV_DEV(dev, &pdev->dev);
-
-       priv = netdev_priv(dev);
-       priv->base = base;
-
-       ret = of_get_phy_mode(pdev->dev.of_node, &priv->phy_mode);
-       if (ret)
-               priv->phy_mode = PHY_INTERFACE_MODE_RGMII;
-
-       priv->clk = devm_clk_get(&pdev->dev, NULL);
-       if (IS_ERR(priv->clk)) {
-               dev_err(&pdev->dev, "failed to get clock\n");
-               ret = PTR_ERR(priv->clk);
-               goto err_free_dev;
-       }
-
-       ret = clk_prepare_enable(priv->clk);
-       if (ret)
-               goto err_free_dev;
-
-       spin_lock_init(&priv->tx_lock);
-
-       if (ops && ops->reset) {
-               ret = ops->reset(dev);
-               if (ret)
-                       goto err_disable_clk;
-       }
-
-       bus = devm_mdiobus_alloc(&pdev->dev);
-       if (!bus) {
-               ret = -ENOMEM;
-               goto err_disable_clk;
-       }
-
-       bus->name = "nb8800-mii";
-       bus->read = nb8800_mdio_read;
-       bus->write = nb8800_mdio_write;
-       bus->parent = &pdev->dev;
-       snprintf(bus->id, MII_BUS_ID_SIZE, "%lx.nb8800-mii",
-                (unsigned long)res->start);
-       bus->priv = priv;
-
-       ret = of_mdiobus_register(bus, pdev->dev.of_node);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to register MII bus\n");
-               goto err_disable_clk;
-       }
-
-       if (of_phy_is_fixed_link(pdev->dev.of_node)) {
-               ret = of_phy_register_fixed_link(pdev->dev.of_node);
-               if (ret < 0) {
-                       dev_err(&pdev->dev, "bad fixed-link spec\n");
-                       goto err_free_bus;
-               }
-               priv->phy_node = of_node_get(pdev->dev.of_node);
-       }
-
-       if (!priv->phy_node)
-               priv->phy_node = of_parse_phandle(pdev->dev.of_node,
-                                                 "phy-handle", 0);
-
-       if (!priv->phy_node) {
-               dev_err(&pdev->dev, "no PHY specified\n");
-               ret = -ENODEV;
-               goto err_free_bus;
-       }
-
-       priv->mii_bus = bus;
-
-       ret = nb8800_hw_init(dev);
-       if (ret)
-               goto err_deregister_fixed_link;
-
-       if (ops && ops->init) {
-               ret = ops->init(dev);
-               if (ret)
-                       goto err_deregister_fixed_link;
-       }
-
-       dev->netdev_ops = &nb8800_netdev_ops;
-       dev->ethtool_ops = &nb8800_ethtool_ops;
-       dev->flags |= IFF_MULTICAST;
-       dev->irq = irq;
-
-       mac = of_get_mac_address(pdev->dev.of_node);
-       if (!IS_ERR(mac))
-               ether_addr_copy(dev->dev_addr, mac);
-
-       if (!is_valid_ether_addr(dev->dev_addr))
-               eth_hw_addr_random(dev);
-
-       nb8800_update_mac_addr(dev);
-
-       netif_carrier_off(dev);
-
-       ret = register_netdev(dev);
-       if (ret) {
-               netdev_err(dev, "failed to register netdev\n");
-               goto err_free_dma;
-       }
-
-       netif_napi_add(dev, &priv->napi, nb8800_poll, NAPI_POLL_WEIGHT);
-
-       netdev_info(dev, "MAC address %pM\n", dev->dev_addr);
-
-       return 0;
-
-err_free_dma:
-       nb8800_dma_free(dev);
-err_deregister_fixed_link:
-       if (of_phy_is_fixed_link(pdev->dev.of_node))
-               of_phy_deregister_fixed_link(pdev->dev.of_node);
-err_free_bus:
-       of_node_put(priv->phy_node);
-       mdiobus_unregister(bus);
-err_disable_clk:
-       clk_disable_unprepare(priv->clk);
-err_free_dev:
-       free_netdev(dev);
-
-       return ret;
-}
-
-static int nb8800_remove(struct platform_device *pdev)
-{
-       struct net_device *ndev = platform_get_drvdata(pdev);
-       struct nb8800_priv *priv = netdev_priv(ndev);
-
-       unregister_netdev(ndev);
-       if (of_phy_is_fixed_link(pdev->dev.of_node))
-               of_phy_deregister_fixed_link(pdev->dev.of_node);
-       of_node_put(priv->phy_node);
-
-       mdiobus_unregister(priv->mii_bus);
-
-       clk_disable_unprepare(priv->clk);
-
-       nb8800_dma_free(ndev);
-       free_netdev(ndev);
-
-       return 0;
-}
-
-static struct platform_driver nb8800_driver = {
-       .driver = {
-               .name           = "nb8800",
-               .of_match_table = nb8800_dt_ids,
-       },
-       .probe  = nb8800_probe,
-       .remove = nb8800_remove,
-};
-
-module_platform_driver(nb8800_driver);
-
-MODULE_DESCRIPTION("Aurora AU-NB8800 Ethernet driver");
-MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h
deleted file mode 100644 (file)
index 40941fb..0000000
+++ /dev/null
@@ -1,316 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NB8800_H_
-#define _NB8800_H_
-
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/phy.h>
-#include <linux/clk.h>
-#include <linux/bitops.h>
-
-#define RX_DESC_COUNT                  256
-#define TX_DESC_COUNT                  256
-
-#define NB8800_DESC_LOW                        4
-
-#define RX_BUF_SIZE                    1552
-
-#define RX_COPYBREAK                   256
-#define RX_COPYHDR                     128
-
-#define MAX_MDC_CLOCK                  2500000
-
-/* Stargate Solutions SSN8800 core registers */
-#define NB8800_TX_CTL1                 0x000
-#define TX_TPD                         BIT(5)
-#define TX_APPEND_FCS                  BIT(4)
-#define TX_PAD_EN                      BIT(3)
-#define TX_RETRY_EN                    BIT(2)
-#define TX_EN                          BIT(0)
-
-#define NB8800_TX_CTL2                 0x001
-
-#define NB8800_RX_CTL                  0x004
-#define RX_BC_DISABLE                  BIT(7)
-#define RX_RUNT                                BIT(6)
-#define RX_AF_EN                       BIT(5)
-#define RX_PAUSE_EN                    BIT(3)
-#define RX_SEND_CRC                    BIT(2)
-#define RX_PAD_STRIP                   BIT(1)
-#define RX_EN                          BIT(0)
-
-#define NB8800_RANDOM_SEED             0x008
-#define NB8800_TX_SDP                  0x14
-#define NB8800_TX_TPDP1                        0x18
-#define NB8800_TX_TPDP2                        0x19
-#define NB8800_SLOT_TIME               0x1c
-
-#define NB8800_MDIO_CMD                        0x020
-#define MDIO_CMD_GO                    BIT(31)
-#define MDIO_CMD_WR                    BIT(26)
-#define MDIO_CMD_ADDR(x)               ((x) << 21)
-#define MDIO_CMD_REG(x)                        ((x) << 16)
-#define MDIO_CMD_DATA(x)               ((x) <<  0)
-
-#define NB8800_MDIO_STS                        0x024
-#define MDIO_STS_ERR                   BIT(31)
-
-#define NB8800_MC_ADDR(i)              (0x028 + (i))
-#define NB8800_MC_INIT                 0x02e
-#define NB8800_UC_ADDR(i)              (0x03c + (i))
-
-#define NB8800_MAC_MODE                        0x044
-#define RGMII_MODE                     BIT(7)
-#define HALF_DUPLEX                    BIT(4)
-#define BURST_EN                       BIT(3)
-#define LOOPBACK_EN                    BIT(2)
-#define GMAC_MODE                      BIT(0)
-
-#define NB8800_IC_THRESHOLD            0x050
-#define NB8800_PE_THRESHOLD            0x051
-#define NB8800_PF_THRESHOLD            0x052
-#define NB8800_TX_BUFSIZE              0x054
-#define NB8800_FIFO_CTL                        0x056
-#define NB8800_PQ1                     0x060
-#define NB8800_PQ2                     0x061
-#define NB8800_SRC_ADDR(i)             (0x06a + (i))
-#define NB8800_STAT_DATA               0x078
-#define NB8800_STAT_INDEX              0x07c
-#define NB8800_STAT_CLEAR              0x07d
-
-#define NB8800_SLEEP_MODE              0x07e
-#define SLEEP_MODE                     BIT(0)
-
-#define NB8800_WAKEUP                  0x07f
-#define WAKEUP                         BIT(0)
-
-/* Aurora NB8800 host interface registers */
-#define NB8800_TXC_CR                  0x100
-#define TCR_LK                         BIT(12)
-#define TCR_DS                         BIT(11)
-#define TCR_BTS(x)                     (((x) & 0x7) << 8)
-#define TCR_DIE                                BIT(7)
-#define TCR_TFI(x)                     (((x) & 0x7) << 4)
-#define TCR_LE                         BIT(3)
-#define TCR_RS                         BIT(2)
-#define TCR_DM                         BIT(1)
-#define TCR_EN                         BIT(0)
-
-#define NB8800_TXC_SR                  0x104
-#define TSR_DE                         BIT(3)
-#define TSR_DI                         BIT(2)
-#define TSR_TO                         BIT(1)
-#define TSR_TI                         BIT(0)
-
-#define NB8800_TX_SAR                  0x108
-#define NB8800_TX_DESC_ADDR            0x10c
-
-#define NB8800_TX_REPORT_ADDR          0x110
-#define TX_BYTES_TRANSFERRED(x)                (((x) >> 16) & 0xffff)
-#define TX_FIRST_DEFERRAL              BIT(7)
-#define TX_EARLY_COLLISIONS(x)         (((x) >> 3) & 0xf)
-#define TX_LATE_COLLISION              BIT(2)
-#define TX_PACKET_DROPPED              BIT(1)
-#define TX_FIFO_UNDERRUN               BIT(0)
-#define IS_TX_ERROR(r)                 ((r) & 0x07)
-
-#define NB8800_TX_FIFO_SR              0x114
-#define NB8800_TX_ITR                  0x118
-
-#define NB8800_RXC_CR                  0x200
-#define RCR_FL                         BIT(13)
-#define RCR_LK                         BIT(12)
-#define RCR_DS                         BIT(11)
-#define RCR_BTS(x)                     (((x) & 7) << 8)
-#define RCR_DIE                                BIT(7)
-#define RCR_RFI(x)                     (((x) & 7) << 4)
-#define RCR_LE                         BIT(3)
-#define RCR_RS                         BIT(2)
-#define RCR_DM                         BIT(1)
-#define RCR_EN                         BIT(0)
-
-#define NB8800_RXC_SR                  0x204
-#define RSR_DE                         BIT(3)
-#define RSR_DI                         BIT(2)
-#define RSR_RO                         BIT(1)
-#define RSR_RI                         BIT(0)
-
-#define NB8800_RX_SAR                  0x208
-#define NB8800_RX_DESC_ADDR            0x20c
-
-#define NB8800_RX_REPORT_ADDR          0x210
-#define RX_BYTES_TRANSFERRED(x)                (((x) >> 16) & 0xFFFF)
-#define RX_MULTICAST_PKT               BIT(9)
-#define RX_BROADCAST_PKT               BIT(8)
-#define RX_LENGTH_ERR                  BIT(7)
-#define RX_FCS_ERR                     BIT(6)
-#define RX_RUNT_PKT                    BIT(5)
-#define RX_FIFO_OVERRUN                        BIT(4)
-#define RX_LATE_COLLISION              BIT(3)
-#define RX_ALIGNMENT_ERROR             BIT(2)
-#define RX_ERROR_MASK                  0xfc
-#define IS_RX_ERROR(r)                 ((r) & RX_ERROR_MASK)
-
-#define NB8800_RX_FIFO_SR              0x214
-#define NB8800_RX_ITR                  0x218
-
-/* Sigma Designs SMP86xx additional registers */
-#define NB8800_TANGOX_PAD_MODE         0x400
-#define PAD_MODE_MASK                  0x7
-#define PAD_MODE_MII                   0x0
-#define PAD_MODE_RGMII                 0x1
-#define PAD_MODE_GTX_CLK_INV           BIT(3)
-#define PAD_MODE_GTX_CLK_DELAY         BIT(4)
-
-#define NB8800_TANGOX_MDIO_CLKDIV      0x420
-#define NB8800_TANGOX_RESET            0x424
-
-/* Hardware DMA descriptor */
-struct nb8800_dma_desc {
-       u32                             s_addr; /* start address */
-       u32                             n_addr; /* next descriptor address */
-       u32                             r_addr; /* report address */
-       u32                             config;
-} __aligned(8);
-
-#define DESC_ID                                BIT(23)
-#define DESC_EOC                       BIT(22)
-#define DESC_EOF                       BIT(21)
-#define DESC_LK                                BIT(20)
-#define DESC_DS                                BIT(19)
-#define DESC_BTS(x)                    (((x) & 0x7) << 16)
-
-/* DMA descriptor and associated data for rx.
- * Allocated from coherent memory.
- */
-struct nb8800_rx_desc {
-       /* DMA descriptor */
-       struct nb8800_dma_desc          desc;
-
-       /* Status report filled in by hardware */
-       u32                             report;
-};
-
-/* Address of buffer on rx ring */
-struct nb8800_rx_buf {
-       struct page                     *page;
-       unsigned long                   offset;
-};
-
-/* DMA descriptors and associated data for tx.
- * Allocated from coherent memory.
- */
-struct nb8800_tx_desc {
-       /* DMA descriptor.  The second descriptor is used if packet
-        * data is unaligned.
-        */
-       struct nb8800_dma_desc          desc[2];
-
-       /* Status report filled in by hardware */
-       u32                             report;
-
-       /* Bounce buffer for initial unaligned part of packet */
-       u8                              buf[8] __aligned(8);
-};
-
-/* Packet in tx queue */
-struct nb8800_tx_buf {
-       /* Currently queued skb */
-       struct sk_buff                  *skb;
-
-       /* DMA address of the first descriptor */
-       dma_addr_t                      dma_desc;
-
-       /* DMA address of packet data */
-       dma_addr_t                      dma_addr;
-
-       /* Length of DMA mapping, less than skb->len if alignment
-        * buffer is used.
-        */
-       unsigned int                    dma_len;
-
-       /* Number of packets in chain starting here */
-       unsigned int                    chain_len;
-
-       /* Packet chain ready to be submitted to hardware */
-       bool                            ready;
-};
-
-struct nb8800_priv {
-       struct napi_struct              napi;
-
-       void __iomem                    *base;
-
-       /* RX DMA descriptors */
-       struct nb8800_rx_desc           *rx_descs;
-
-       /* RX buffers referenced by DMA descriptors */
-       struct nb8800_rx_buf            *rx_bufs;
-
-       /* Current end of chain */
-       u32                             rx_eoc;
-
-       /* Value for rx interrupt time register in NAPI interrupt mode */
-       u32                             rx_itr_irq;
-
-       /* Value for rx interrupt time register in NAPI poll mode */
-       u32                             rx_itr_poll;
-
-       /* Value for config field of rx DMA descriptors */
-       u32                             rx_dma_config;
-
-       /* TX DMA descriptors */
-       struct nb8800_tx_desc           *tx_descs;
-
-       /* TX packet queue */
-       struct nb8800_tx_buf            *tx_bufs;
-
-       /* Number of free tx queue entries */
-       atomic_t                        tx_free;
-
-       /* First free tx queue entry */
-       u32                             tx_next;
-
-       /* Next buffer to transmit */
-       u32                             tx_queue;
-
-       /* Start of current packet chain */
-       struct nb8800_tx_buf            *tx_chain;
-
-       /* Next buffer to reclaim */
-       u32                             tx_done;
-
-       /* Lock for DMA activation */
-       spinlock_t                      tx_lock;
-
-       struct mii_bus                  *mii_bus;
-       struct device_node              *phy_node;
-
-       /* PHY connection type from DT */
-       phy_interface_t                 phy_mode;
-
-       /* Current link status */
-       int                             speed;
-       int                             duplex;
-       int                             link;
-
-       /* Pause settings */
-       bool                            pause_aneg;
-       bool                            pause_rx;
-       bool                            pause_tx;
-
-       /* DMA base address of rx descriptors, see rx_descs above */
-       dma_addr_t                      rx_desc_dma;
-
-       /* DMA base address of tx descriptors, see tx_descs above */
-       dma_addr_t                      tx_desc_dma;
-
-       struct clk                      *clk;
-};
-
-struct nb8800_ops {
-       int                             (*init)(struct net_device *dev);
-       int                             (*reset)(struct net_device *dev);
-};
-
-#endif /* _NB8800_H_ */
index d31a5ad..f508c5c 100644 (file)
@@ -255,6 +255,7 @@ static const u16 bnxt_async_events_arr[] = {
        ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE,
        ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY,
        ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY,
+       ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION,
        ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG,
 };
 
@@ -1265,8 +1266,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
        } else {
                tpa_info->hash_type = PKT_HASH_TYPE_NONE;
                tpa_info->gso_type = 0;
-               if (netif_msg_rx_err(bp))
-                       netdev_warn(bp->dev, "TPA packet without valid hash\n");
+               netif_warn(bp, rx_err, bp->dev, "TPA packet without valid hash\n");
        }
        tpa_info->flags2 = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_flags2);
        tpa_info->metadata = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_metadata);
@@ -2021,10 +2021,9 @@ static int bnxt_async_event_process(struct bnxt *bp,
                        goto async_event_process_exit;
                set_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event);
                break;
-       case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
-               if (netif_msg_hw(bp))
-                       netdev_warn(bp->dev, "Received RESET_NOTIFY event, data1: 0x%x, data2: 0x%x\n",
-                                   data1, data2);
+       case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: {
+               char *fatal_str = "non-fatal";
+
                if (!bp->fw_health)
                        goto async_event_process_exit;
 
@@ -2036,14 +2035,17 @@ static int bnxt_async_event_process(struct bnxt *bp,
                if (!bp->fw_reset_max_dsecs)
                        bp->fw_reset_max_dsecs = BNXT_DFLT_FW_RST_MAX_DSECS;
                if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) {
-                       netdev_warn(bp->dev, "Firmware fatal reset event received\n");
+                       fatal_str = "fatal";
                        set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state);
-               } else {
-                       netdev_warn(bp->dev, "Firmware non-fatal reset event received, max wait time %d msec\n",
-                                   bp->fw_reset_max_dsecs * 100);
                }
+               netif_warn(bp, hw, bp->dev,
+                          "Firmware %s reset event, data1: 0x%x, data2: 0x%x, min wait %u ms, max wait %u ms\n",
+                          fatal_str, data1, data2,
+                          bp->fw_reset_min_dsecs * 100,
+                          bp->fw_reset_max_dsecs * 100);
                set_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event);
                break;
+       }
        case ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY: {
                struct bnxt_fw_health *fw_health = bp->fw_health;
 
@@ -2055,13 +2057,11 @@ static int bnxt_async_event_process(struct bnxt *bp,
                if (!fw_health->enabled)
                        break;
 
-               if (netif_msg_drv(bp))
-                       netdev_info(bp->dev, "Error recovery info: error recovery[%d], master[%d], reset count[0x%x], health status: 0x%x\n",
-                                   fw_health->enabled, fw_health->master,
-                                   bnxt_fw_health_readl(bp,
-                                                        BNXT_FW_RESET_CNT_REG),
-                                   bnxt_fw_health_readl(bp,
-                                                        BNXT_FW_HEALTH_REG));
+               netif_info(bp, drv, bp->dev,
+                          "Error recovery info: error recovery[%d], master[%d], reset count[0x%x], health status: 0x%x\n",
+                          fw_health->enabled, fw_health->master,
+                          bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG),
+                          bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG));
                fw_health->tmr_multiplier =
                        DIV_ROUND_UP(fw_health->polling_dsecs * HZ,
                                     bp->current_interval * 10);
@@ -2072,6 +2072,11 @@ static int bnxt_async_event_process(struct bnxt *bp,
                        bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
                goto async_event_process_exit;
        }
+       case ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION:
+               netif_notice(bp, hw, bp->dev,
+                            "Received firmware debug notification, data1: 0x%x, data2: 0x%x\n",
+                            data1, data2);
+               goto async_event_process_exit;
        case ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG: {
                struct bnxt_rx_ring_info *rxr;
                u16 grp_idx;
@@ -2394,6 +2399,10 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
        struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
        int work_done = 0;
 
+       if (unlikely(test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))) {
+               napi_complete(napi);
+               return 0;
+       }
        while (1) {
                work_done += bnxt_poll_work(bp, cpr, budget - work_done);
 
@@ -2468,6 +2477,10 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
        int work_done = 0;
        u32 cons;
 
+       if (unlikely(test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))) {
+               napi_complete(napi);
+               return 0;
+       }
        if (cpr->has_more_work) {
                cpr->has_more_work = 0;
                work_done = __bnxt_poll_cqs(bp, bnapi, budget);
@@ -4272,6 +4285,9 @@ static void bnxt_disable_int_sync(struct bnxt *bp)
 {
        int i;
 
+       if (!bp->irq_tbl)
+               return;
+
        atomic_inc(&bp->intr_sem);
 
        bnxt_disable_int(bp);
@@ -4425,6 +4441,8 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 
        if (!timeout)
                timeout = DFLT_HWRM_CMD_TIMEOUT;
+       /* Limit timeout to an upper limit */
+       timeout = min(timeout, HWRM_CMD_MAX_TIMEOUT);
        /* convert timeout to usec */
        timeout *= 1000;
 
@@ -6845,6 +6863,7 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
        struct hwrm_func_backing_store_cfg_input req = {0};
        struct bnxt_ctx_mem_info *ctx = bp->ctx;
        struct bnxt_ctx_pg_info *ctx_pg;
+       u32 req_len = sizeof(req);
        __le32 *num_entries;
        __le64 *pg_dir;
        u32 flags = 0;
@@ -6855,6 +6874,8 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
        if (!ctx)
                return 0;
 
+       if (req_len > bp->hwrm_max_ext_req_len)
+               req_len = BNXT_BACKING_STORE_CFG_LEGACY_LEN;
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_BACKING_STORE_CFG, -1, -1);
        req.enables = cpu_to_le32(enables);
 
@@ -6938,7 +6959,7 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
                bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, pg_attr, pg_dir);
        }
        req.flags = cpu_to_le32(flags);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       return hwrm_send_message(bp, &req, req_len, HWRM_CMD_TIMEOUT);
 }
 
 static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp,
@@ -7438,9 +7459,22 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
 
        sig = readl(hs + offsetof(struct hcomm_status, sig_ver));
        if ((sig & HCOMM_STATUS_SIGNATURE_MASK) != HCOMM_STATUS_SIGNATURE_VAL) {
-               if (bp->fw_health)
-                       bp->fw_health->status_reliable = false;
-               return;
+               if (!bp->chip_num) {
+                       __bnxt_map_fw_health_reg(bp, BNXT_GRC_REG_BASE);
+                       bp->chip_num = readl(bp->bar0 +
+                                            BNXT_FW_HEALTH_WIN_BASE +
+                                            BNXT_GRC_REG_CHIP_NUM);
+               }
+               if (!BNXT_CHIP_P5(bp)) {
+                       if (bp->fw_health)
+                               bp->fw_health->status_reliable = false;
+                       return;
+               }
+               status_loc = BNXT_GRC_REG_STATUS_P5 |
+                            BNXT_FW_HEALTH_REG_TYPE_BAR0;
+       } else {
+               status_loc = readl(hs + offsetof(struct hcomm_status,
+                                                fw_status_loc));
        }
 
        if (__bnxt_alloc_fw_health(bp)) {
@@ -7448,7 +7482,6 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
                return;
        }
 
-       status_loc = readl(hs + offsetof(struct hcomm_status, fw_status_loc));
        bp->fw_health->regs[BNXT_FW_HEALTH_REG] = status_loc;
        reg_type = BNXT_FW_HEALTH_REG_TYPE(status_loc);
        if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC) {
@@ -8811,7 +8844,8 @@ static void bnxt_disable_napi(struct bnxt *bp)
 {
        int i;
 
-       if (!bp->bnapi)
+       if (!bp->bnapi ||
+           test_and_set_bit(BNXT_STATE_NAPI_DISABLED, &bp->state))
                return;
 
        for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -8828,6 +8862,7 @@ static void bnxt_enable_napi(struct bnxt *bp)
 {
        int i;
 
+       clear_bit(BNXT_STATE_NAPI_DISABLED, &bp->state);
        for (i = 0; i < bp->cp_nr_rings; i++) {
                struct bnxt_napi *bnapi = bp->bnapi[i];
                struct bnxt_cp_ring_info *cpr;
@@ -9334,13 +9369,60 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
 
 static int bnxt_fw_init_one(struct bnxt *bp);
 
+static int bnxt_fw_reset_via_optee(struct bnxt *bp)
+{
+#ifdef CONFIG_TEE_BNXT_FW
+       int rc = tee_bnxt_fw_load();
+
+       if (rc)
+               netdev_err(bp->dev, "Failed FW reset via OP-TEE, rc=%d\n", rc);
+
+       return rc;
+#else
+       netdev_err(bp->dev, "OP-TEE not supported\n");
+       return -ENODEV;
+#endif
+}
+
+static int bnxt_try_recover_fw(struct bnxt *bp)
+{
+       if (bp->fw_health && bp->fw_health->status_reliable) {
+               int retry = 0, rc;
+               u32 sts;
+
+               mutex_lock(&bp->hwrm_cmd_lock);
+               do {
+                       rc = __bnxt_hwrm_ver_get(bp, true);
+                       sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+                       if (!sts || !BNXT_FW_IS_BOOTING(sts))
+                               break;
+                       retry++;
+               } while (rc == -EBUSY && retry < BNXT_FW_RETRY);
+               mutex_unlock(&bp->hwrm_cmd_lock);
+
+               if (!BNXT_FW_IS_HEALTHY(sts)) {
+                       netdev_err(bp->dev,
+                                  "Firmware not responding, status: 0x%x\n",
+                                  sts);
+                       rc = -ENODEV;
+               }
+               if (sts & FW_STATUS_REG_CRASHED_NO_MASTER) {
+                       netdev_warn(bp->dev, "Firmware recover via OP-TEE requested\n");
+                       return bnxt_fw_reset_via_optee(bp);
+               }
+               return rc;
+       }
+
+       return -ENODEV;
+}
+
 static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 {
        struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr;
        struct hwrm_func_drv_if_change_input req = {0};
        bool resc_reinit = false, fw_reset = false;
+       int rc, retry = 0;
        u32 flags = 0;
-       int rc;
 
        if (!(bp->fw_cap & BNXT_FW_CAP_IF_CHANGE))
                return 0;
@@ -9349,10 +9431,25 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
        if (up)
                req.flags = cpu_to_le32(FUNC_DRV_IF_CHANGE_REQ_FLAGS_UP);
        mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       while (retry < BNXT_FW_IF_RETRY) {
+               rc = _hwrm_send_message(bp, &req, sizeof(req),
+                                       HWRM_CMD_TIMEOUT);
+               if (rc != -EAGAIN)
+                       break;
+
+               msleep(50);
+               retry++;
+       }
        if (!rc)
                flags = le32_to_cpu(resp->flags);
        mutex_unlock(&bp->hwrm_cmd_lock);
+
+       if (rc == -EAGAIN)
+               return rc;
+       if (rc && up) {
+               rc = bnxt_try_recover_fw(bp);
+               fw_reset = true;
+       }
        if (rc)
                return rc;
 
@@ -9692,6 +9789,25 @@ static void bnxt_preset_reg_win(struct bnxt *bp)
 
 static int bnxt_init_dflt_ring_mode(struct bnxt *bp);
 
+static int bnxt_reinit_after_abort(struct bnxt *bp)
+{
+       int rc;
+
+       if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+               return -EBUSY;
+
+       rc = bnxt_fw_init_one(bp);
+       if (!rc) {
+               bnxt_clear_int_mode(bp);
+               rc = bnxt_init_int_mode(bp);
+               if (!rc) {
+                       clear_bit(BNXT_STATE_ABORT_ERR, &bp->state);
+                       set_bit(BNXT_STATE_FW_RESET_DET, &bp->state);
+               }
+       }
+       return rc;
+}
+
 static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 {
        int rc = 0;
@@ -9850,8 +9966,14 @@ static int bnxt_open(struct net_device *dev)
        int rc;
 
        if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) {
-               netdev_err(bp->dev, "A previous firmware reset did not complete, aborting\n");
-               return -ENODEV;
+               rc = bnxt_reinit_after_abort(bp);
+               if (rc) {
+                       if (rc == -EBUSY)
+                               netdev_err(bp->dev, "A previous firmware reset has not completed, aborting\n");
+                       else
+                               netdev_err(bp->dev, "Failed to reinitialize after aborted firmware reset\n");
+                       return -ENODEV;
+               }
        }
 
        rc = bnxt_hwrm_if_change(bp, true);
@@ -10788,11 +10910,18 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
 static void bnxt_fw_reset_close(struct bnxt *bp)
 {
        bnxt_ulp_stop(bp);
-       /* When firmware is fatal state, disable PCI device to prevent
-        * any potential bad DMAs before freeing kernel memory.
+       /* When firmware is in fatal state, quiesce device and disable
+        * bus master to prevent any potential bad DMAs before freeing
+        * kernel memory.
         */
-       if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
+       if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
+               bnxt_tx_disable(bp);
+               bnxt_disable_napi(bp);
+               bnxt_disable_int_sync(bp);
+               bnxt_free_irq(bp);
+               bnxt_clear_int_mode(bp);
                pci_disable_device(bp->pdev);
+       }
        __bnxt_close_nic(bp, true, false);
        bnxt_clear_int_mode(bp);
        bnxt_hwrm_func_drv_unrgtr(bp);
@@ -11180,21 +11309,6 @@ static void bnxt_init_dflt_coal(struct bnxt *bp)
        bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
 }
 
-static int bnxt_fw_reset_via_optee(struct bnxt *bp)
-{
-#ifdef CONFIG_TEE_BNXT_FW
-       int rc = tee_bnxt_fw_load();
-
-       if (rc)
-               netdev_err(bp->dev, "Failed FW reset via OP-TEE, rc=%d\n", rc);
-
-       return rc;
-#else
-       netdev_err(bp->dev, "OP-TEE not supported\n");
-       return -ENODEV;
-#endif
-}
-
 static int bnxt_fw_init_one_p1(struct bnxt *bp)
 {
        int rc;
@@ -11203,19 +11317,10 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp)
        rc = bnxt_hwrm_ver_get(bp);
        bnxt_try_map_fw_health_reg(bp);
        if (rc) {
-               if (bp->fw_health && bp->fw_health->status_reliable) {
-                       u32 sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
-
-                       netdev_err(bp->dev,
-                                  "Firmware not responding, status: 0x%x\n",
-                                  sts);
-                       if (sts & FW_STATUS_REG_CRASHED_NO_MASTER) {
-                               netdev_warn(bp->dev, "Firmware recover via OP-TEE requested\n");
-                               rc = bnxt_fw_reset_via_optee(bp);
-                               if (!rc)
-                                       rc = bnxt_hwrm_ver_get(bp);
-                       }
-               }
+               rc = bnxt_try_recover_fw(bp);
+               if (rc)
+                       return rc;
+               rc = bnxt_hwrm_ver_get(bp);
                if (rc)
                        return rc;
        }
@@ -11415,6 +11520,12 @@ static void bnxt_reset_all(struct bnxt *bp)
        bp->fw_reset_timestamp = jiffies;
 }
 
+static bool bnxt_fw_reset_timeout(struct bnxt *bp)
+{
+       return time_after(jiffies, bp->fw_reset_timestamp +
+                         (bp->fw_reset_max_dsecs * HZ / 10));
+}
+
 static void bnxt_fw_reset_task(struct work_struct *work)
 {
        struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work);
@@ -11436,8 +11547,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                                   bp->fw_reset_timestamp));
                        goto fw_reset_abort;
                } else if (n > 0) {
-                       if (time_after(jiffies, bp->fw_reset_timestamp +
-                                      (bp->fw_reset_max_dsecs * HZ / 10))) {
+                       if (bnxt_fw_reset_timeout(bp)) {
                                clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
                                bp->fw_reset_state = 0;
                                netdev_err(bp->dev, "Firmware reset aborted, bnxt_get_registered_vfs() returns %d\n",
@@ -11466,8 +11576,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 
                val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
                if (!(val & BNXT_FW_STATUS_SHUTDOWN) &&
-                   !time_after(jiffies, bp->fw_reset_timestamp +
-                   (bp->fw_reset_max_dsecs * HZ / 10))) {
+                   !bnxt_fw_reset_timeout(bp)) {
                        bnxt_queue_fw_reset_work(bp, HZ / 5);
                        return;
                }
@@ -11509,8 +11618,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                bp->hwrm_cmd_timeout = SHORT_HWRM_CMD_TIMEOUT;
                rc = __bnxt_hwrm_ver_get(bp, true);
                if (rc) {
-                       if (time_after(jiffies, bp->fw_reset_timestamp +
-                                      (bp->fw_reset_max_dsecs * HZ / 10))) {
+                       if (bnxt_fw_reset_timeout(bp)) {
                                netdev_err(bp->dev, "Firmware reset aborted\n");
                                goto fw_reset_abort_status;
                        }
@@ -12542,9 +12650,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        dev->ethtool_ops = &bnxt_ethtool_ops;
        pci_set_drvdata(pdev, dev);
 
-       if (BNXT_PF(bp))
-               bnxt_vpd_read_info(bp);
-
        rc = bnxt_alloc_hwrm_resources(bp);
        if (rc)
                goto init_err_pci_clean;
@@ -12556,6 +12661,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (rc)
                goto init_err_pci_clean;
 
+       if (BNXT_PF(bp))
+               bnxt_vpd_read_info(bp);
+
        if (BNXT_CHIP_P5(bp)) {
                bp->flags |= BNXT_FLAG_CHIP_P5;
                if (BNXT_CHIP_SR2(bp))
index 51996c8..4ef6888 100644 (file)
@@ -656,6 +656,7 @@ struct nqe_cn {
 #define BNXT_HWRM_MAX_REQ_LEN          (bp->hwrm_max_req_len)
 #define BNXT_HWRM_SHORT_REQ_LEN                sizeof(struct hwrm_short_input)
 #define DFLT_HWRM_CMD_TIMEOUT          500
+#define HWRM_CMD_MAX_TIMEOUT           40000
 #define SHORT_HWRM_CMD_TIMEOUT         20
 #define HWRM_CMD_TIMEOUT               (bp->hwrm_cmd_timeout)
 #define HWRM_RESET_TIMEOUT             ((HWRM_CMD_TIMEOUT) * 4)
@@ -1345,9 +1346,14 @@ struct bnxt_test_info {
 #define BNXT_CAG_REG_LEGACY_INT_STATUS         0x4014
 #define BNXT_CAG_REG_BASE                      0x300000
 
+#define BNXT_GRC_REG_STATUS_P5                 0x520
+
 #define BNXT_GRCPF_REG_KONG_COMM               0xA00
 #define BNXT_GRCPF_REG_KONG_COMM_TRIGGER       0xB00
 
+#define BNXT_GRC_REG_CHIP_NUM                  0x48
+#define BNXT_GRC_REG_BASE                      0x260000
+
 #define BNXT_GRC_BASE_MASK                     0xfffff000
 #define BNXT_GRC_OFFSET_MASK                   0x00000ffc
 
@@ -1441,6 +1447,8 @@ struct bnxt_ctx_pg_info {
 #define BNXT_MAX_TQM_RINGS             \
        (BNXT_MAX_TQM_SP_RINGS + BNXT_MAX_TQM_FP_RINGS)
 
+#define BNXT_BACKING_STORE_CFG_LEGACY_LEN      256
+
 struct bnxt_ctx_mem_info {
        u32     qp_max_entries;
        u16     qp_min_qp1_entries;
@@ -1532,9 +1540,22 @@ struct bnxt_fw_reporter_ctx {
 #define BNXT_FW_HEALTH_WIN_OFF(reg)    (BNXT_FW_HEALTH_WIN_BASE +      \
                                         ((reg) & BNXT_GRC_OFFSET_MASK))
 
+#define BNXT_FW_STATUS_HEALTH_MSK      0xffff
 #define BNXT_FW_STATUS_HEALTHY         0x8000
 #define BNXT_FW_STATUS_SHUTDOWN                0x100000
 
+#define BNXT_FW_IS_HEALTHY(sts)                (((sts) & BNXT_FW_STATUS_HEALTH_MSK) ==\
+                                        BNXT_FW_STATUS_HEALTHY)
+
+#define BNXT_FW_IS_BOOTING(sts)                (((sts) & BNXT_FW_STATUS_HEALTH_MSK) < \
+                                        BNXT_FW_STATUS_HEALTHY)
+
+#define BNXT_FW_IS_ERR(sts)            (((sts) & BNXT_FW_STATUS_HEALTH_MSK) > \
+                                        BNXT_FW_STATUS_HEALTHY)
+
+#define BNXT_FW_RETRY                  5
+#define BNXT_FW_IF_RETRY               10
+
 struct bnxt {
        void __iomem            *bar0;
        void __iomem            *bar1;
@@ -1788,6 +1809,7 @@ struct bnxt {
 #define BNXT_STATE_FW_FATAL_COND       6
 #define BNXT_STATE_DRV_REGISTERED      7
 #define BNXT_STATE_PCI_CHANNEL_IO_FROZEN       8
+#define BNXT_STATE_NAPI_DISABLED       9
 
 #define BNXT_NO_FW_ACCESS(bp)                                  \
        (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) ||    \
index 6b7b69e..90a31b4 100644 (file)
@@ -44,21 +44,20 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
                                     struct netlink_ext_ack *extack)
 {
        struct bnxt *bp = devlink_health_reporter_priv(reporter);
-       u32 val, health_status;
+       u32 val;
        int rc;
 
        if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
                return 0;
 
        val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
-       health_status = val & 0xffff;
 
-       if (health_status < BNXT_FW_STATUS_HEALTHY) {
+       if (BNXT_FW_IS_BOOTING(val)) {
                rc = devlink_fmsg_string_pair_put(fmsg, "Description",
                                                  "Not yet completed initialization");
                if (rc)
                        return rc;
-       } else if (health_status > BNXT_FW_STATUS_HEALTHY) {
+       } else if (BNXT_FW_IS_ERR(val)) {
                rc = devlink_fmsg_string_pair_put(fmsg, "Description",
                                                  "Encountered fatal error and cannot recover");
                if (rc)
index 2d3e962..d5c6e6a 100644 (file)
@@ -2,7 +2,7 @@
  *
  * Copyright (c) 2014-2016 Broadcom Corporation
  * Copyright (c) 2014-2018 Broadcom Limited
- * Copyright (c) 2018-2020 Broadcom Inc.
+ * Copyright (c) 2018-2021 Broadcom Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -164,6 +164,7 @@ struct cmd_nums {
        #define HWRM_VNIC_PLCMODES_CFG                    0x48UL
        #define HWRM_VNIC_PLCMODES_QCFG                   0x49UL
        #define HWRM_VNIC_QCAPS                           0x4aUL
+       #define HWRM_VNIC_UPDATE                          0x4bUL
        #define HWRM_RING_ALLOC                           0x50UL
        #define HWRM_RING_FREE                            0x51UL
        #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS        0x52UL
@@ -184,6 +185,9 @@ struct cmd_nums {
        #define HWRM_QUEUE_MPLS_QCAPS                     0x80UL
        #define HWRM_QUEUE_MPLSTC2PRI_QCFG                0x81UL
        #define HWRM_QUEUE_MPLSTC2PRI_CFG                 0x82UL
+       #define HWRM_QUEUE_VLANPRI_QCAPS                  0x83UL
+       #define HWRM_QUEUE_VLANPRI2PRI_QCFG               0x84UL
+       #define HWRM_QUEUE_VLANPRI2PRI_CFG                0x85UL
        #define HWRM_CFA_L2_FILTER_ALLOC                  0x90UL
        #define HWRM_CFA_L2_FILTER_FREE                   0x91UL
        #define HWRM_CFA_L2_FILTER_CFG                    0x92UL
@@ -217,6 +221,8 @@ struct cmd_nums {
        #define HWRM_PORT_TX_FIR_CFG                      0xbbUL
        #define HWRM_PORT_TX_FIR_QCFG                     0xbcUL
        #define HWRM_PORT_ECN_QSTATS                      0xbdUL
+       #define HWRM_FW_LIVEPATCH_QUERY                   0xbeUL
+       #define HWRM_FW_LIVEPATCH                         0xbfUL
        #define HWRM_FW_RESET                             0xc0UL
        #define HWRM_FW_QSTATUS                           0xc1UL
        #define HWRM_FW_HEALTH_CHECK                      0xc2UL
@@ -347,6 +353,8 @@ struct cmd_nums {
        #define HWRM_FUNC_HOST_PF_IDS_QUERY               0x197UL
        #define HWRM_FUNC_QSTATS_EXT                      0x198UL
        #define HWRM_STAT_EXT_CTX_QUERY                   0x199UL
+       #define HWRM_FUNC_SPD_CFG                         0x19aUL
+       #define HWRM_FUNC_SPD_QCFG                        0x19bUL
        #define HWRM_SELFTEST_QLIST                       0x200UL
        #define HWRM_SELFTEST_EXEC                        0x201UL
        #define HWRM_SELFTEST_IRQ                         0x202UL
@@ -359,6 +367,11 @@ struct cmd_nums {
        #define HWRM_MFG_HDMA_TEST                        0x209UL
        #define HWRM_MFG_FRU_EEPROM_WRITE                 0x20aUL
        #define HWRM_MFG_FRU_EEPROM_READ                  0x20bUL
+       #define HWRM_MFG_SOC_IMAGE                        0x20cUL
+       #define HWRM_MFG_SOC_QSTATUS                      0x20dUL
+       #define HWRM_MFG_PARAM_SEEPROM_SYNC               0x20eUL
+       #define HWRM_MFG_PARAM_SEEPROM_READ               0x20fUL
+       #define HWRM_MFG_PARAM_SEEPROM_HEALTH             0x210UL
        #define HWRM_TF                                   0x2bcUL
        #define HWRM_TF_VERSION_GET                       0x2bdUL
        #define HWRM_TF_SESSION_OPEN                      0x2c6UL
@@ -384,6 +397,7 @@ struct cmd_nums {
        #define HWRM_TF_EXT_EM_QCFG                       0x2e9UL
        #define HWRM_TF_EM_INSERT                         0x2eaUL
        #define HWRM_TF_EM_DELETE                         0x2ebUL
+       #define HWRM_TF_EM_HASH_INSERT                    0x2ecUL
        #define HWRM_TF_TCAM_SET                          0x2f8UL
        #define HWRM_TF_TCAM_GET                          0x2f9UL
        #define HWRM_TF_TCAM_MOVE                         0x2faUL
@@ -486,9 +500,9 @@ struct hwrm_err_output {
 #define HWRM_TARGET_ID_TOOLS 0xFFFD
 #define HWRM_VERSION_MAJOR 1
 #define HWRM_VERSION_MINOR 10
-#define HWRM_VERSION_UPDATE 1
-#define HWRM_VERSION_RSVD 68
-#define HWRM_VERSION_STR "1.10.1.68"
+#define HWRM_VERSION_UPDATE 2
+#define HWRM_VERSION_RSVD 11
+#define HWRM_VERSION_STR "1.10.2.11"
 
 /* hwrm_ver_get_input (size:192b/24B) */
 struct hwrm_ver_get_input {
@@ -563,8 +577,9 @@ struct hwrm_ver_get_output {
        __le16  max_resp_len;
        __le16  def_req_timeout;
        u8      flags;
-       #define VER_GET_RESP_FLAGS_DEV_NOT_RDY       0x1UL
-       #define VER_GET_RESP_FLAGS_EXT_VER_AVAIL     0x2UL
+       #define VER_GET_RESP_FLAGS_DEV_NOT_RDY                   0x1UL
+       #define VER_GET_RESP_FLAGS_EXT_VER_AVAIL                 0x2UL
+       #define VER_GET_RESP_FLAGS_DEV_NOT_RDY_BACKING_STORE     0x4UL
        u8      unused_0[2];
        u8      always_1;
        __le16  hwrm_intf_major;
@@ -708,6 +723,7 @@ struct hwrm_async_event_cmpl {
        #define ASYNC_EVENT_CMPL_EVENT_ID_QUIESCE_DONE               0x3fUL
        #define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE          0x40UL
        #define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE    0x41UL
+       #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID          0x42UL
        #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG               0xfeUL
        #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR                 0xffUL
        #define ASYNC_EVENT_CMPL_EVENT_ID_LAST                      ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
@@ -815,6 +831,8 @@ struct hwrm_async_event_cmpl_reset_notify {
        #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY 0x8UL
        #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_LAST        ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY
        __le32  event_data2;
+       #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_MASK 0xffffUL
+       #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_SFT 0
        u8      opaque_v;
        #define ASYNC_EVENT_CMPL_RESET_NOTIFY_V          0x1UL
        #define ASYNC_EVENT_CMPL_RESET_NOTIFY_OPAQUE_MASK 0xfeUL
@@ -832,7 +850,8 @@ struct hwrm_async_event_cmpl_reset_notify {
        #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MANAGEMENT_RESET_REQUEST  (0x1UL << 8)
        #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL        (0x2UL << 8)
        #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL    (0x3UL << 8)
-       #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST                     ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL
+       #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET                (0x4UL << 8)
+       #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST                     ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET
        #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_MASK           0xffff0000UL
        #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_SFT            16
 };
@@ -1271,6 +1290,10 @@ struct hwrm_func_qcaps_output {
        #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_PROXY_SRC_INTF_OVERRIDE_SUPPORT     0x20UL
        #define FUNC_QCAPS_RESP_FLAGS_EXT_SCHQ_SUPPORTED                         0x40UL
        #define FUNC_QCAPS_RESP_FLAGS_EXT_PPP_PUSH_MODE_SUPPORTED                0x80UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_EVB_MODE_CFG_NOT_SUPPORTED             0x100UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_SOC_SPD_SUPPORTED                      0x200UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED                 0x400UL
+       #define FUNC_QCAPS_RESP_FLAGS_EXT_FAST_RESET_CAPABLE                     0x800UL
        u8      max_schqs;
        u8      mpc_chnls_cap;
        #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE         0x1UL
@@ -1315,6 +1338,7 @@ struct hwrm_func_qcfg_output {
        #define FUNC_QCFG_RESP_FLAGS_HOT_RESET_ALLOWED            0x200UL
        #define FUNC_QCFG_RESP_FLAGS_PPP_PUSH_MODE_ENABLED        0x400UL
        #define FUNC_QCFG_RESP_FLAGS_RING_MONITOR_ENABLED         0x800UL
+       #define FUNC_QCFG_RESP_FLAGS_FAST_RESET_ALLOWED           0x1000UL
        u8      mac_address[6];
        __le16  pci_id;
        __le16  alloc_rsscos_ctx;
@@ -1731,6 +1755,7 @@ struct hwrm_func_drv_rgtr_input {
        #define FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT          0x10UL
        #define FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT     0x20UL
        #define FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT             0x40UL
+       #define FUNC_DRV_RGTR_REQ_FLAGS_FAST_RESET_SUPPORT         0x80UL
        __le32  enables;
        #define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE             0x1UL
        #define FUNC_DRV_RGTR_REQ_ENABLES_VER                 0x2UL
@@ -1993,7 +2018,7 @@ struct hwrm_func_backing_store_qcaps_input {
        __le64  resp_addr;
 };
 
-/* hwrm_func_backing_store_qcaps_output (size:640b/80B) */
+/* hwrm_func_backing_store_qcaps_output (size:704b/88B) */
 struct hwrm_func_backing_store_qcaps_output {
        __le16  error_code;
        __le16  req_type;
@@ -2024,13 +2049,25 @@ struct hwrm_func_backing_store_qcaps_output {
        __le16  mrav_num_entries_units;
        u8      tqm_entries_multiple;
        u8      ctx_kind_initializer;
-       __le32  rsvd;
-       __le16  rsvd1;
+       __le16  ctx_init_mask;
+       #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_QP       0x1UL
+       #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_SRQ      0x2UL
+       #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_CQ       0x4UL
+       #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_VNIC     0x8UL
+       #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_STAT     0x10UL
+       #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_MRAV     0x20UL
+       u8      qp_init_offset;
+       u8      srq_init_offset;
+       u8      cq_init_offset;
+       u8      vnic_init_offset;
        u8      tqm_fp_rings_count;
+       u8      stat_init_offset;
+       u8      mrav_init_offset;
+       u8      rsvd[6];
        u8      valid;
 };
 
-/* hwrm_func_backing_store_cfg_input (size:2048b/256B) */
+/* hwrm_func_backing_store_cfg_input (size:2432b/304B) */
 struct hwrm_func_backing_store_cfg_input {
        __le16  req_type;
        __le16  cmpl_ring;
@@ -2041,22 +2078,25 @@ struct hwrm_func_backing_store_cfg_input {
        #define FUNC_BACKING_STORE_CFG_REQ_FLAGS_PREBOOT_MODE               0x1UL
        #define FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT     0x2UL
        __le32  enables;
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP            0x1UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ           0x2UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ            0x4UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC          0x8UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT          0x10UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP        0x20UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING0     0x40UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING1     0x80UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING2     0x100UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING3     0x200UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING4     0x400UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING5     0x800UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING6     0x1000UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING7     0x2000UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV          0x4000UL
-       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM           0x8000UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP             0x1UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ            0x2UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ             0x4UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC           0x8UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT           0x10UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP         0x20UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING0      0x40UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING1      0x80UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING2      0x100UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING3      0x200UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING4      0x400UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING5      0x800UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING6      0x1000UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING7      0x2000UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV           0x4000UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM            0x8000UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING8      0x10000UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING9      0x20000UL
+       #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING10     0x40000UL
        u8      qpc_pg_size_qpc_lvl;
        #define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_MASK      0xfUL
        #define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_SFT       0
@@ -2358,6 +2398,63 @@ struct hwrm_func_backing_store_cfg_input {
        __le16  tqm_entry_size;
        __le16  mrav_entry_size;
        __le16  tim_entry_size;
+       u8      tqm_ring8_pg_size_tqm_ring_lvl;
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_MASK      0xfUL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_SFT       0
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_0       0x0UL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_1       0x1UL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_2       0x2UL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LAST       FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_2
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_MASK  0xf0UL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_SFT   4
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_4K   (0x0UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_8K   (0x1UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_64K  (0x2UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_2M   (0x3UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_8M   (0x4UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_1G   (0x5UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_LAST   FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_1G
+       u8      ring8_unused[3];
+       __le32  tqm_ring8_num_entries;
+       __le64  tqm_ring8_page_dir;
+       u8      tqm_ring9_pg_size_tqm_ring_lvl;
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_MASK      0xfUL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_SFT       0
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_0       0x0UL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_1       0x1UL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_2       0x2UL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LAST       FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_2
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_MASK  0xf0UL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_SFT   4
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_4K   (0x0UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_8K   (0x1UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_64K  (0x2UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_2M   (0x3UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_8M   (0x4UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_1G   (0x5UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_LAST   FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_1G
+       u8      ring9_unused[3];
+       __le32  tqm_ring9_num_entries;
+       __le64  tqm_ring9_page_dir;
+       u8      tqm_ring10_pg_size_tqm_ring_lvl;
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_MASK      0xfUL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_SFT       0
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_0       0x0UL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_1       0x1UL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_2       0x2UL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LAST       FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_2
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_MASK  0xf0UL
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_SFT   4
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_4K   (0x0UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_8K   (0x1UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_64K  (0x2UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_2M   (0x3UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_8M   (0x4UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_1G   (0x5UL << 4)
+       #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_LAST   FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_1G
+       u8      ring10_unused[3];
+       __le32  tqm_ring10_num_entries;
+       __le64  tqm_ring10_page_dir;
 };
 
 /* hwrm_func_backing_store_cfg_output (size:128b/16B) */
@@ -2930,6 +3027,7 @@ struct hwrm_port_phy_qcfg_output {
        #define PORT_PHY_QCFG_RESP_DUPLEX_STATE_LAST PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL
        u8      option_flags;
        #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_MEDIA_AUTO_DETECT     0x1UL
+       #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN     0x2UL
        char    phy_vendor_name[16];
        char    phy_vendor_partnumber[16];
        __le16  support_pam4_speeds;
@@ -3528,8 +3626,8 @@ struct hwrm_port_phy_qcaps_output {
        #define PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED         0x8UL
        #define PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET     0x10UL
        #define PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED         0x20UL
-       #define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_MASK                       0xc0UL
-       #define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_SFT                        6
+       #define PORT_PHY_QCAPS_RESP_FLAGS_FW_MANAGED_LINK_DOWN             0x40UL
+       #define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1                            0x80UL
        u8      port_cnt;
        #define PORT_PHY_QCAPS_RESP_PORT_CNT_UNKNOWN 0x0UL
        #define PORT_PHY_QCAPS_RESP_PORT_CNT_1       0x1UL
@@ -4119,7 +4217,10 @@ struct hwrm_queue_qportcfg_output {
        #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS_NIC   0x3UL
        #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN        0xffUL
        #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LAST          QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN
-       u8      unused_0;
+       u8      queue_id0_service_profile_type;
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_NIC      0x2UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_CNP      0x4UL
        char    qid0_name[16];
        char    qid1_name[16];
        char    qid2_name[16];
@@ -4128,7 +4229,34 @@ struct hwrm_queue_qportcfg_output {
        char    qid5_name[16];
        char    qid6_name[16];
        char    qid7_name[16];
-       u8      unused_1[7];
+       u8      queue_id1_service_profile_type;
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_NIC      0x2UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_CNP      0x4UL
+       u8      queue_id2_service_profile_type;
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_NIC      0x2UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_CNP      0x4UL
+       u8      queue_id3_service_profile_type;
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_NIC      0x2UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_CNP      0x4UL
+       u8      queue_id4_service_profile_type;
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_NIC      0x2UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_CNP      0x4UL
+       u8      queue_id5_service_profile_type;
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_NIC      0x2UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_CNP      0x4UL
+       u8      queue_id6_service_profile_type;
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_NIC      0x2UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_CNP      0x4UL
+       u8      queue_id7_service_profile_type;
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_NIC      0x2UL
+       #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_CNP      0x4UL
        u8      valid;
 };
 
@@ -5142,8 +5270,10 @@ struct hwrm_vnic_alloc_input {
        __le16  target_id;
        __le64  resp_addr;
        __le32  flags;
-       #define VNIC_ALLOC_REQ_FLAGS_DEFAULT     0x1UL
-       u8      unused_0[4];
+       #define VNIC_ALLOC_REQ_FLAGS_DEFAULT                  0x1UL
+       #define VNIC_ALLOC_REQ_FLAGS_VIRTIO_NET_FID_VALID     0x2UL
+       __le16  virtio_net_fid;
+       u8      unused_0[2];
 };
 
 /* hwrm_vnic_alloc_output (size:128b/16B) */
@@ -5260,6 +5390,8 @@ struct hwrm_vnic_qcaps_output {
        #define VNIC_QCAPS_RESP_FLAGS_OUTERMOST_RSS_CAP                   0x80UL
        #define VNIC_QCAPS_RESP_FLAGS_COS_ASSIGNMENT_CAP                  0x100UL
        #define VNIC_QCAPS_RESP_FLAGS_RX_CMPL_V2_CAP                      0x200UL
+       #define VNIC_QCAPS_RESP_FLAGS_VNIC_STATE_CAP                      0x400UL
+       #define VNIC_QCAPS_RESP_FLAGS_VIRTIO_NET_VNIC_ALLOC_CAP           0x800UL
        __le16  max_aggs_supported;
        u8      unused_1[5];
        u8      valid;
@@ -5585,7 +5717,11 @@ struct hwrm_ring_alloc_output {
        __le16  resp_len;
        __le16  ring_id;
        __le16  logical_ring_id;
-       u8      unused_0[3];
+       u8      push_buffer_index;
+       #define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PING_BUFFER 0x0UL
+       #define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER 0x1UL
+       #define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_LAST       RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER
+       u8      unused_0[2];
        u8      valid;
 };
 
@@ -5644,7 +5780,11 @@ struct hwrm_ring_reset_output {
        __le16  req_type;
        __le16  seq_id;
        __le16  resp_len;
-       u8      unused_0[4];
+       u8      push_buffer_index;
+       #define RING_RESET_RESP_PUSH_BUFFER_INDEX_PING_BUFFER 0x0UL
+       #define RING_RESET_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER 0x1UL
+       #define RING_RESET_RESP_PUSH_BUFFER_INDEX_LAST       RING_RESET_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER
+       u8      unused_0[3];
        u8      consumer_idx[3];
        u8      valid;
 };
@@ -6988,21 +7128,23 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output {
        __le16  seq_id;
        __le16  resp_len;
        __le32  flags;
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_16BIT_SUPPORTED                  0x1UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_64BIT_SUPPORTED                  0x2UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_BATCH_DELETE_SUPPORTED               0x4UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_RESET_ALL_SUPPORTED                  0x8UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_DEST_FUNC_SUPPORTED           0x10UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TX_EEM_FLOW_SUPPORTED                     0x20UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RX_EEM_FLOW_SUPPORTED                     0x40UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_COUNTER_ALLOC_SUPPORTED              0x80UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_SUPPORTED                0x100UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_UNTAGGED_VLAN_SUPPORTED                   0x200UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_XDP_SUPPORTED                             0x400UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_HEADER_SOURCE_FIELDS_SUPPORTED         0x800UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ARP_SUPPORTED              0x1000UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_V2_SUPPORTED             0x2000UL
-       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ETHERTYPE_IP_SUPPORTED     0x4000UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_16BIT_SUPPORTED                     0x1UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_64BIT_SUPPORTED                     0x2UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_BATCH_DELETE_SUPPORTED                  0x4UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_RESET_ALL_SUPPORTED                     0x8UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_DEST_FUNC_SUPPORTED              0x10UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TX_EEM_FLOW_SUPPORTED                        0x20UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RX_EEM_FLOW_SUPPORTED                        0x40UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_COUNTER_ALLOC_SUPPORTED                 0x80UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_SUPPORTED                   0x100UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_UNTAGGED_VLAN_SUPPORTED                      0x200UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_XDP_SUPPORTED                                0x400UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_HEADER_SOURCE_FIELDS_SUPPORTED            0x800UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ARP_SUPPORTED                 0x1000UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_V2_SUPPORTED                0x2000UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ETHERTYPE_IP_SUPPORTED        0x4000UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TRUFLOW_CAPABLE                              0x8000UL
+       #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_FILTER_TRAFFIC_TYPE_L2_ROCE_SUPPORTED     0x10000UL
        u8      unused_0[3];
        u8      valid;
 };
@@ -7472,7 +7614,8 @@ struct hwrm_struct_hdr {
        #define STRUCT_HDR_STRUCT_ID_AFM_OPAQUE         0x1UL
        #define STRUCT_HDR_STRUCT_ID_PORT_DESCRIPTION   0xaUL
        #define STRUCT_HDR_STRUCT_ID_RSS_V2             0x64UL
-       #define STRUCT_HDR_STRUCT_ID_LAST              STRUCT_HDR_STRUCT_ID_RSS_V2
+       #define STRUCT_HDR_STRUCT_ID_MSIX_PER_VF        0xc8UL
+       #define STRUCT_HDR_STRUCT_ID_LAST              STRUCT_HDR_STRUCT_ID_MSIX_PER_VF
        __le16  len;
        u8      version;
        u8      count;
@@ -8000,6 +8143,9 @@ struct hwrm_dbg_coredump_initiate_output {
 struct coredump_data_hdr {
        __le32  address;
        __le32  flags_length;
+       #define COREDUMP_DATA_HDR_FLAGS_LENGTH_ACTUAL_LEN_MASK     0xffffffUL
+       #define COREDUMP_DATA_HDR_FLAGS_LENGTH_ACTUAL_LEN_SFT      0
+       #define COREDUMP_DATA_HDR_FLAGS_LENGTH_INDIRECT_ACCESS     0x1000000UL
        __le32  instance;
        __le32  next_offset;
 };
@@ -8669,7 +8815,6 @@ struct hcomm_status {
        #define HCOMM_STATUS_TRUE_OFFSET_MASK        0xfffffffcUL
        #define HCOMM_STATUS_TRUE_OFFSET_SFT         2
 };
-
 #define HCOMM_STATUS_STRUCT_LOC 0x31001F0UL
 
 #endif /* _BNXT_HSI_H_ */
index 5143cdd..8936c2b 100644 (file)
@@ -12826,11 +12826,13 @@ static __be32 *tg3_vpd_readblock(struct tg3 *tp, u32 *vpdlen)
 
                        offset = tg3_nvram_logical_addr(tp, offset);
                }
-       }
 
-       if (!offset || !len) {
-               offset = TG3_NVM_VPD_OFF;
-               len = TG3_NVM_VPD_LEN;
+               if (!offset || !len) {
+                       offset = TG3_NVM_VPD_OFF;
+                       len = TG3_NVM_VPD_LEN;
+               }
+       } else {
+               len = TG3_NVM_PCI_VPD_MAX_LEN;
        }
 
        buf = kmalloc(len, GFP_KERNEL);
@@ -12846,26 +12848,16 @@ static __be32 *tg3_vpd_readblock(struct tg3 *tp, u32 *vpdlen)
                        if (tg3_nvram_read_be32(tp, offset + i, &buf[i/4]))
                                goto error;
                }
+               *vpdlen = len;
        } else {
-               u8 *ptr;
                ssize_t cnt;
-               unsigned int pos = 0;
-
-               ptr = (u8 *)&buf[0];
-               for (i = 0; pos < len && i < 3; i++, pos += cnt, ptr += cnt) {
-                       cnt = pci_read_vpd(tp->pdev, pos,
-                                          len - pos, ptr);
-                       if (cnt == -ETIMEDOUT || cnt == -EINTR)
-                               cnt = 0;
-                       else if (cnt < 0)
-                               goto error;
-               }
-               if (pos != len)
+
+               cnt = pci_read_vpd(tp->pdev, 0, len, (u8 *)buf);
+               if (cnt < 0)
                        goto error;
+               *vpdlen = cnt;
        }
 
-       *vpdlen = len;
-
        return buf;
 
 error:
index 1000c89..46ec4fd 100644 (file)
 /* Hardware Legacy NVRAM layout */
 #define TG3_NVM_VPD_OFF                        0x100
 #define TG3_NVM_VPD_LEN                        256
+#define TG3_NVM_PCI_VPD_MAX_LEN                512
 
 /* Hardware Selfboot NVRAM layout */
 #define TG3_NVM_HWSB_CFG1              0x00000004
index 814a5b1..472bf8f 100644 (file)
@@ -470,6 +470,10 @@ static void macb_set_tx_clk(struct macb *bp, int speed)
        if (!bp->tx_clk || (bp->caps & MACB_CAPS_CLK_HW_CHG))
                return;
 
+       /* In case of MII the PHY is the clock master */
+       if (bp->phy_interface == PHY_INTERFACE_MODE_MII)
+               return;
+
        switch (speed) {
        case SPEED_10:
                rate = 2500000;
index 1554266..9f1965c 100644 (file)
@@ -5137,7 +5137,7 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
 
        /* See if FW supports FW_FILTER2 work request */
        if (is_t4(adap->params.chip)) {
-               adap->params.filter2_wr_support = 0;
+               adap->params.filter2_wr_support = false;
        } else {
                params[0] = FW_PARAM_DEV(FILTER2_WR);
                ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
index 98d01a7..98829e4 100644 (file)
@@ -2689,7 +2689,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 #define VPD_BASE           0x400
 #define VPD_BASE_OLD       0
 #define VPD_LEN            1024
-#define CHELSIO_VPD_UNIQUE_ID 0x82
 
 /**
  * t4_eeprom_ptov - translate a physical EEPROM address to virtual
@@ -2745,7 +2744,7 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
 {
        int i, ret = 0, addr;
        int ec, sn, pn, na;
-       u8 *vpd, csum;
+       u8 *vpd, csum, base_val = 0;
        unsigned int vpdr_len, kw_offset, id_len;
 
        vpd = vmalloc(VPD_LEN);
@@ -2755,17 +2754,11 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
        /* Card information normally starts at VPD_BASE but early cards had
         * it at 0.
         */
-       ret = pci_read_vpd(adapter->pdev, VPD_BASE, sizeof(u32), vpd);
+       ret = pci_read_vpd(adapter->pdev, VPD_BASE, 1, &base_val);
        if (ret < 0)
                goto out;
 
-       /* The VPD shall have a unique identifier specified by the PCI SIG.
-        * For chelsio adapters, the identifier is 0x82. The first byte of a VPD
-        * shall be CHELSIO_VPD_UNIQUE_ID (0x82). The VPD programming software
-        * is expected to automatically put this entry at the
-        * beginning of the VPD.
-        */
-       addr = *vpd == CHELSIO_VPD_UNIQUE_ID ? VPD_BASE : VPD_BASE_OLD;
+       addr = base_val == PCI_VPD_LRDT_ID_STRING ? VPD_BASE : VPD_BASE_OLD;
 
        ret = pci_read_vpd(adapter->pdev, addr, VPD_LEN, vpd);
        if (ret < 0)
index 6d853f0..ef4e2fe 100644 (file)
@@ -70,9 +70,32 @@ static struct {
 module_param_named(debug, debug.msg_enable, int, 0);
 MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 0xffff=all)");
 
-static struct ucc_geth_info ugeth_primary_info = {
+static int ucc_geth_thread_count(enum ucc_geth_num_of_threads idx)
+{
+       static const u8 count[] = {
+               [UCC_GETH_NUM_OF_THREADS_1] = 1,
+               [UCC_GETH_NUM_OF_THREADS_2] = 2,
+               [UCC_GETH_NUM_OF_THREADS_4] = 4,
+               [UCC_GETH_NUM_OF_THREADS_6] = 6,
+               [UCC_GETH_NUM_OF_THREADS_8] = 8,
+       };
+       if (idx >= ARRAY_SIZE(count))
+               return 0;
+       return count[idx];
+}
+
+static inline int ucc_geth_tx_queues(const struct ucc_geth_info *info)
+{
+       return 1;
+}
+
+static inline int ucc_geth_rx_queues(const struct ucc_geth_info *info)
+{
+       return 1;
+}
+
+static const struct ucc_geth_info ugeth_primary_info = {
        .uf_info = {
-                   .bd_mem_part = MEM_PART_SYSTEM,
                    .rtsm = UCC_FAST_SEND_IDLES_BETWEEN_FRAMES,
                    .max_rx_buf_length = 1536,
                    /* adjusted at startup if max-speed 1000 */
@@ -90,8 +113,6 @@ static struct ucc_geth_info ugeth_primary_info = {
                    .tcrc = UCC_FAST_16_BIT_CRC,
                    .synl = UCC_FAST_SYNC_LEN_NOT_USED,
                    },
-       .numQueuesTx = 1,
-       .numQueuesRx = 1,
        .extendedFilteringChainPointer = ((uint32_t) NULL),
        .typeorlen = 3072 /*1536 */ ,
        .nonBackToBackIfgPart1 = 0x40,
@@ -157,8 +178,6 @@ static struct ucc_geth_info ugeth_primary_info = {
        .riscRx = QE_RISC_ALLOCATION_RISC1_AND_RISC2,
 };
 
-static struct ucc_geth_info ugeth_info[8];
-
 #ifdef DEBUG
 static void mem_disp(u8 *addr, int size)
 {
@@ -558,7 +577,7 @@ static void dump_bds(struct ucc_geth_private *ugeth)
        int i;
        int length;
 
-       for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) {
+       for (i = 0; i < ucc_geth_tx_queues(ugeth->ug_info); i++) {
                if (ugeth->p_tx_bd_ring[i]) {
                        length =
                            (ugeth->ug_info->bdRingLenTx[i] *
@@ -567,7 +586,7 @@ static void dump_bds(struct ucc_geth_private *ugeth)
                        mem_disp(ugeth->p_tx_bd_ring[i], length);
                }
        }
-       for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) {
+       for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) {
                if (ugeth->p_rx_bd_ring[i]) {
                        length =
                            (ugeth->ug_info->bdRingLenRx[i] *
@@ -671,32 +690,12 @@ static void dump_regs(struct ucc_geth_private *ugeth)
                in_be32(&ugeth->ug_regs->scam));
 
        if (ugeth->p_thread_data_tx) {
-               int numThreadsTxNumerical;
-               switch (ugeth->ug_info->numThreadsTx) {
-               case UCC_GETH_NUM_OF_THREADS_1:
-                       numThreadsTxNumerical = 1;
-                       break;
-               case UCC_GETH_NUM_OF_THREADS_2:
-                       numThreadsTxNumerical = 2;
-                       break;
-               case UCC_GETH_NUM_OF_THREADS_4:
-                       numThreadsTxNumerical = 4;
-                       break;
-               case UCC_GETH_NUM_OF_THREADS_6:
-                       numThreadsTxNumerical = 6;
-                       break;
-               case UCC_GETH_NUM_OF_THREADS_8:
-                       numThreadsTxNumerical = 8;
-                       break;
-               default:
-                       numThreadsTxNumerical = 0;
-                       break;
-               }
+               int count = ucc_geth_thread_count(ugeth->ug_info->numThreadsTx);
 
                pr_info("Thread data TXs:\n");
                pr_info("Base address: 0x%08x\n",
                        (u32)ugeth->p_thread_data_tx);
-               for (i = 0; i < numThreadsTxNumerical; i++) {
+               for (i = 0; i < count; i++) {
                        pr_info("Thread data TX[%d]:\n", i);
                        pr_info("Base address: 0x%08x\n",
                                (u32)&ugeth->p_thread_data_tx[i]);
@@ -705,32 +704,12 @@ static void dump_regs(struct ucc_geth_private *ugeth)
                }
        }
        if (ugeth->p_thread_data_rx) {
-               int numThreadsRxNumerical;
-               switch (ugeth->ug_info->numThreadsRx) {
-               case UCC_GETH_NUM_OF_THREADS_1:
-                       numThreadsRxNumerical = 1;
-                       break;
-               case UCC_GETH_NUM_OF_THREADS_2:
-                       numThreadsRxNumerical = 2;
-                       break;
-               case UCC_GETH_NUM_OF_THREADS_4:
-                       numThreadsRxNumerical = 4;
-                       break;
-               case UCC_GETH_NUM_OF_THREADS_6:
-                       numThreadsRxNumerical = 6;
-                       break;
-               case UCC_GETH_NUM_OF_THREADS_8:
-                       numThreadsRxNumerical = 8;
-                       break;
-               default:
-                       numThreadsRxNumerical = 0;
-                       break;
-               }
+               int count = ucc_geth_thread_count(ugeth->ug_info->numThreadsRx);
 
                pr_info("Thread data RX:\n");
                pr_info("Base address: 0x%08x\n",
                        (u32)ugeth->p_thread_data_rx);
-               for (i = 0; i < numThreadsRxNumerical; i++) {
+               for (i = 0; i < count; i++) {
                        pr_info("Thread data RX[%d]:\n", i);
                        pr_info("Base address: 0x%08x\n",
                                (u32)&ugeth->p_thread_data_rx[i]);
@@ -905,7 +884,7 @@ static void dump_regs(struct ucc_geth_private *ugeth)
        if (ugeth->p_send_q_mem_reg) {
                pr_info("Send Q memory registers:\n");
                pr_info("Base address: 0x%08x\n", (u32)ugeth->p_send_q_mem_reg);
-               for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) {
+               for (i = 0; i < ucc_geth_tx_queues(ugeth->ug_info); i++) {
                        pr_info("SQQD[%d]:\n", i);
                        pr_info("Base address: 0x%08x\n",
                                (u32)&ugeth->p_send_q_mem_reg->sqqd[i]);
@@ -937,7 +916,7 @@ static void dump_regs(struct ucc_geth_private *ugeth)
                pr_info("RX IRQ coalescing tables:\n");
                pr_info("Base address: 0x%08x\n",
                        (u32)ugeth->p_rx_irq_coalescing_tbl);
-               for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) {
+               for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) {
                        pr_info("RX IRQ coalescing table entry[%d]:\n", i);
                        pr_info("Base address: 0x%08x\n",
                                (u32)&ugeth->p_rx_irq_coalescing_tbl->
@@ -959,7 +938,7 @@ static void dump_regs(struct ucc_geth_private *ugeth)
        if (ugeth->p_rx_bd_qs_tbl) {
                pr_info("RX BD QS tables:\n");
                pr_info("Base address: 0x%08x\n", (u32)ugeth->p_rx_bd_qs_tbl);
-               for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) {
+               for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) {
                        pr_info("RX BD QS table[%d]:\n", i);
                        pr_info("Base address: 0x%08x\n",
                                (u32)&ugeth->p_rx_bd_qs_tbl[i]);
@@ -1835,7 +1814,7 @@ static void ucc_geth_free_rx(struct ucc_geth_private *ugeth)
        ug_info = ugeth->ug_info;
        uf_info = &ug_info->uf_info;
 
-       for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) {
+       for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) {
                if (ugeth->p_rx_bd_ring[i]) {
                        /* Return existing data buffers in ring */
                        bd = ugeth->p_rx_bd_ring[i];
@@ -1856,12 +1835,7 @@ static void ucc_geth_free_rx(struct ucc_geth_private *ugeth)
 
                        kfree(ugeth->rx_skbuff[i]);
 
-                       if (ugeth->ug_info->uf_info.bd_mem_part ==
-                           MEM_PART_SYSTEM)
-                               kfree((void *)ugeth->rx_bd_ring_offset[i]);
-                       else if (ugeth->ug_info->uf_info.bd_mem_part ==
-                                MEM_PART_MURAM)
-                               qe_muram_free(ugeth->rx_bd_ring_offset[i]);
+                       kfree(ugeth->p_rx_bd_ring[i]);
                        ugeth->p_rx_bd_ring[i] = NULL;
                }
        }
@@ -1880,7 +1854,7 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth)
        ug_info = ugeth->ug_info;
        uf_info = &ug_info->uf_info;
 
-       for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) {
+       for (i = 0; i < ucc_geth_tx_queues(ugeth->ug_info); i++) {
                bd = ugeth->p_tx_bd_ring[i];
                if (!bd)
                        continue;
@@ -1898,15 +1872,8 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth)
 
                kfree(ugeth->tx_skbuff[i]);
 
-               if (ugeth->p_tx_bd_ring[i]) {
-                       if (ugeth->ug_info->uf_info.bd_mem_part ==
-                           MEM_PART_SYSTEM)
-                               kfree((void *)ugeth->tx_bd_ring_offset[i]);
-                       else if (ugeth->ug_info->uf_info.bd_mem_part ==
-                                MEM_PART_MURAM)
-                               qe_muram_free(ugeth->tx_bd_ring_offset[i]);
-                       ugeth->p_tx_bd_ring[i] = NULL;
-               }
+               kfree(ugeth->p_tx_bd_ring[i]);
+               ugeth->p_tx_bd_ring[i] = NULL;
        }
 
 }
@@ -1921,50 +1888,39 @@ static void ucc_geth_memclean(struct ucc_geth_private *ugeth)
                ugeth->uccf = NULL;
        }
 
-       if (ugeth->p_thread_data_tx) {
-               qe_muram_free(ugeth->thread_dat_tx_offset);
-               ugeth->p_thread_data_tx = NULL;
-       }
-       if (ugeth->p_thread_data_rx) {
-               qe_muram_free(ugeth->thread_dat_rx_offset);
-               ugeth->p_thread_data_rx = NULL;
-       }
-       if (ugeth->p_exf_glbl_param) {
-               qe_muram_free(ugeth->exf_glbl_param_offset);
-               ugeth->p_exf_glbl_param = NULL;
-       }
-       if (ugeth->p_rx_glbl_pram) {
-               qe_muram_free(ugeth->rx_glbl_pram_offset);
-               ugeth->p_rx_glbl_pram = NULL;
-       }
-       if (ugeth->p_tx_glbl_pram) {
-               qe_muram_free(ugeth->tx_glbl_pram_offset);
-               ugeth->p_tx_glbl_pram = NULL;
-       }
-       if (ugeth->p_send_q_mem_reg) {
-               qe_muram_free(ugeth->send_q_mem_reg_offset);
-               ugeth->p_send_q_mem_reg = NULL;
-       }
-       if (ugeth->p_scheduler) {
-               qe_muram_free(ugeth->scheduler_offset);
-               ugeth->p_scheduler = NULL;
-       }
-       if (ugeth->p_tx_fw_statistics_pram) {
-               qe_muram_free(ugeth->tx_fw_statistics_pram_offset);
-               ugeth->p_tx_fw_statistics_pram = NULL;
-       }
-       if (ugeth->p_rx_fw_statistics_pram) {
-               qe_muram_free(ugeth->rx_fw_statistics_pram_offset);
-               ugeth->p_rx_fw_statistics_pram = NULL;
-       }
-       if (ugeth->p_rx_irq_coalescing_tbl) {
-               qe_muram_free(ugeth->rx_irq_coalescing_tbl_offset);
-               ugeth->p_rx_irq_coalescing_tbl = NULL;
-       }
-       if (ugeth->p_rx_bd_qs_tbl) {
-               qe_muram_free(ugeth->rx_bd_qs_tbl_offset);
-               ugeth->p_rx_bd_qs_tbl = NULL;
-       }
+       qe_muram_free_addr(ugeth->p_thread_data_tx);
+       ugeth->p_thread_data_tx = NULL;
+
+       qe_muram_free_addr(ugeth->p_thread_data_rx);
+       ugeth->p_thread_data_rx = NULL;
+
+       qe_muram_free_addr(ugeth->p_exf_glbl_param);
+       ugeth->p_exf_glbl_param = NULL;
+
+       qe_muram_free_addr(ugeth->p_rx_glbl_pram);
+       ugeth->p_rx_glbl_pram = NULL;
+
+       qe_muram_free_addr(ugeth->p_tx_glbl_pram);
+       ugeth->p_tx_glbl_pram = NULL;
+
+       qe_muram_free_addr(ugeth->p_send_q_mem_reg);
+       ugeth->p_send_q_mem_reg = NULL;
+
+       qe_muram_free_addr(ugeth->p_scheduler);
+       ugeth->p_scheduler = NULL;
+
+       qe_muram_free_addr(ugeth->p_tx_fw_statistics_pram);
+       ugeth->p_tx_fw_statistics_pram = NULL;
+
+       qe_muram_free_addr(ugeth->p_rx_fw_statistics_pram);
+       ugeth->p_rx_fw_statistics_pram = NULL;
+
+       qe_muram_free_addr(ugeth->p_rx_irq_coalescing_tbl);
+       ugeth->p_rx_irq_coalescing_tbl = NULL;
+
+       qe_muram_free_addr(ugeth->p_rx_bd_qs_tbl);
+       ugeth->p_rx_bd_qs_tbl = NULL;
+
        if (ugeth->p_init_enet_param_shadow) {
                return_init_enet_entries(ugeth,
                                         &(ugeth->p_init_enet_param_shadow->
@@ -2073,15 +2029,8 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
        ug_info = ugeth->ug_info;
        uf_info = &ug_info->uf_info;
 
-       if (!((uf_info->bd_mem_part == MEM_PART_SYSTEM) ||
-             (uf_info->bd_mem_part == MEM_PART_MURAM))) {
-               if (netif_msg_probe(ugeth))
-                       pr_err("Bad memory partition value\n");
-               return -EINVAL;
-       }
-
        /* Rx BD lengths */
-       for (i = 0; i < ug_info->numQueuesRx; i++) {
+       for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) {
                if ((ug_info->bdRingLenRx[i] < UCC_GETH_RX_BD_RING_SIZE_MIN) ||
                    (ug_info->bdRingLenRx[i] %
                     UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT)) {
@@ -2092,7 +2041,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
        }
 
        /* Tx BD lengths */
-       for (i = 0; i < ug_info->numQueuesTx; i++) {
+       for (i = 0; i < ucc_geth_tx_queues(ug_info); i++) {
                if (ug_info->bdRingLenTx[i] < UCC_GETH_TX_BD_RING_SIZE_MIN) {
                        if (netif_msg_probe(ugeth))
                                pr_err("Tx BD ring length must be no smaller than 2\n");
@@ -2109,14 +2058,14 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
        }
 
        /* num Tx queues */
-       if (ug_info->numQueuesTx > NUM_TX_QUEUES) {
+       if (ucc_geth_tx_queues(ug_info) > NUM_TX_QUEUES) {
                if (netif_msg_probe(ugeth))
                        pr_err("number of tx queues too large\n");
                return -EINVAL;
        }
 
        /* num Rx queues */
-       if (ug_info->numQueuesRx > NUM_RX_QUEUES) {
+       if (ucc_geth_rx_queues(ug_info) > NUM_RX_QUEUES) {
                if (netif_msg_probe(ugeth))
                        pr_err("number of rx queues too large\n");
                return -EINVAL;
@@ -2124,7 +2073,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
 
        /* l2qt */
        for (i = 0; i < UCC_GETH_VLAN_PRIORITY_MAX; i++) {
-               if (ug_info->l2qt[i] >= ug_info->numQueuesRx) {
+               if (ug_info->l2qt[i] >= ucc_geth_rx_queues(ug_info)) {
                        if (netif_msg_probe(ugeth))
                                pr_err("VLAN priority table entry must not be larger than number of Rx queues\n");
                        return -EINVAL;
@@ -2133,7 +2082,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
 
        /* l3qt */
        for (i = 0; i < UCC_GETH_IP_PRIORITY_MAX; i++) {
-               if (ug_info->l3qt[i] >= ug_info->numQueuesRx) {
+               if (ug_info->l3qt[i] >= ucc_geth_rx_queues(ug_info)) {
                        if (netif_msg_probe(ugeth))
                                pr_err("IP priority table entry must not be larger than number of Rx queues\n");
                        return -EINVAL;
@@ -2156,10 +2105,10 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
 
        /* Generate uccm_mask for receive */
        uf_info->uccm_mask = ug_info->eventRegMask & UCCE_OTHER;/* Errors */
-       for (i = 0; i < ug_info->numQueuesRx; i++)
+       for (i = 0; i < ucc_geth_rx_queues(ug_info); i++)
                uf_info->uccm_mask |= (UCC_GETH_UCCE_RXF0 << i);
 
-       for (i = 0; i < ug_info->numQueuesTx; i++)
+       for (i = 0; i < ucc_geth_tx_queues(ug_info); i++)
                uf_info->uccm_mask |= (UCC_GETH_UCCE_TXB0 << i);
        /* Initialize the general fast UCC block. */
        if (ucc_fast_init(uf_info, &ugeth->uccf)) {
@@ -2198,53 +2147,32 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth)
        uf_info = &ug_info->uf_info;
 
        /* Allocate Tx bds */
-       for (j = 0; j < ug_info->numQueuesTx; j++) {
-               /* Allocate in multiple of
-                  UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT,
-                  according to spec */
-               length = ((ug_info->bdRingLenTx[j] * sizeof(struct qe_bd))
-                         / UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT)
-                   * UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT;
-               if ((ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)) %
-                   UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT)
-                       length += UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT;
-               if (uf_info->bd_mem_part == MEM_PART_SYSTEM) {
-                       u32 align = 4;
-                       if (UCC_GETH_TX_BD_RING_ALIGNMENT > 4)
-                               align = UCC_GETH_TX_BD_RING_ALIGNMENT;
-                       ugeth->tx_bd_ring_offset[j] =
-                               (u32) kmalloc((u32) (length + align), GFP_KERNEL);
-
-                       if (ugeth->tx_bd_ring_offset[j] != 0)
-                               ugeth->p_tx_bd_ring[j] =
-                                       (u8 __iomem *)((ugeth->tx_bd_ring_offset[j] +
-                                       align) & ~(align - 1));
-               } else if (uf_info->bd_mem_part == MEM_PART_MURAM) {
-                       ugeth->tx_bd_ring_offset[j] =
-                           qe_muram_alloc(length,
-                                          UCC_GETH_TX_BD_RING_ALIGNMENT);
-                       if (!IS_ERR_VALUE(ugeth->tx_bd_ring_offset[j]))
-                               ugeth->p_tx_bd_ring[j] =
-                                   (u8 __iomem *) qe_muram_addr(ugeth->
-                                                        tx_bd_ring_offset[j]);
-               }
+       for (j = 0; j < ucc_geth_tx_queues(ug_info); j++) {
+               u32 align = max(UCC_GETH_TX_BD_RING_ALIGNMENT,
+                               UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT);
+               u32 alloc;
+
+               length = ug_info->bdRingLenTx[j] * sizeof(struct qe_bd);
+               alloc = round_up(length, align);
+               alloc = roundup_pow_of_two(alloc);
+
+               ugeth->p_tx_bd_ring[j] = kmalloc(alloc, GFP_KERNEL);
+
                if (!ugeth->p_tx_bd_ring[j]) {
                        if (netif_msg_ifup(ugeth))
                                pr_err("Can not allocate memory for Tx bd rings\n");
                        return -ENOMEM;
                }
                /* Zero unused end of bd ring, according to spec */
-               memset_io((void __iomem *)(ugeth->p_tx_bd_ring[j] +
-                      ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)), 0,
-                      length - ug_info->bdRingLenTx[j] * sizeof(struct qe_bd));
+               memset(ugeth->p_tx_bd_ring[j] + length, 0, alloc - length);
        }
 
        /* Init Tx bds */
-       for (j = 0; j < ug_info->numQueuesTx; j++) {
+       for (j = 0; j < ucc_geth_tx_queues(ug_info); j++) {
                /* Setup the skbuff rings */
                ugeth->tx_skbuff[j] =
-                       kmalloc_array(ugeth->ug_info->bdRingLenTx[j],
-                                     sizeof(struct sk_buff *), GFP_KERNEL);
+                       kcalloc(ugeth->ug_info->bdRingLenTx[j],
+                               sizeof(struct sk_buff *), GFP_KERNEL);
 
                if (ugeth->tx_skbuff[j] == NULL) {
                        if (netif_msg_ifup(ugeth))
@@ -2252,9 +2180,6 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth)
                        return -ENOMEM;
                }
 
-               for (i = 0; i < ugeth->ug_info->bdRingLenTx[j]; i++)
-                       ugeth->tx_skbuff[j][i] = NULL;
-
                ugeth->skb_curtx[j] = ugeth->skb_dirtytx[j] = 0;
                bd = ugeth->confBd[j] = ugeth->txBd[j] = ugeth->p_tx_bd_ring[j];
                for (i = 0; i < ug_info->bdRingLenTx[j]; i++) {
@@ -2284,27 +2209,15 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth)
        uf_info = &ug_info->uf_info;
 
        /* Allocate Rx bds */
-       for (j = 0; j < ug_info->numQueuesRx; j++) {
+       for (j = 0; j < ucc_geth_rx_queues(ug_info); j++) {
+               u32 align = UCC_GETH_RX_BD_RING_ALIGNMENT;
+               u32 alloc;
+
                length = ug_info->bdRingLenRx[j] * sizeof(struct qe_bd);
-               if (uf_info->bd_mem_part == MEM_PART_SYSTEM) {
-                       u32 align = 4;
-                       if (UCC_GETH_RX_BD_RING_ALIGNMENT > 4)
-                               align = UCC_GETH_RX_BD_RING_ALIGNMENT;
-                       ugeth->rx_bd_ring_offset[j] =
-                               (u32) kmalloc((u32) (length + align), GFP_KERNEL);
-                       if (ugeth->rx_bd_ring_offset[j] != 0)
-                               ugeth->p_rx_bd_ring[j] =
-                                       (u8 __iomem *)((ugeth->rx_bd_ring_offset[j] +
-                                       align) & ~(align - 1));
-               } else if (uf_info->bd_mem_part == MEM_PART_MURAM) {
-                       ugeth->rx_bd_ring_offset[j] =
-                           qe_muram_alloc(length,
-                                          UCC_GETH_RX_BD_RING_ALIGNMENT);
-                       if (!IS_ERR_VALUE(ugeth->rx_bd_ring_offset[j]))
-                               ugeth->p_rx_bd_ring[j] =
-                                   (u8 __iomem *) qe_muram_addr(ugeth->
-                                                        rx_bd_ring_offset[j]);
-               }
+               alloc = round_up(length, align);
+               alloc = roundup_pow_of_two(alloc);
+
+               ugeth->p_rx_bd_ring[j] = kmalloc(alloc, GFP_KERNEL);
                if (!ugeth->p_rx_bd_ring[j]) {
                        if (netif_msg_ifup(ugeth))
                                pr_err("Can not allocate memory for Rx bd rings\n");
@@ -2313,11 +2226,11 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth)
        }
 
        /* Init Rx bds */
-       for (j = 0; j < ug_info->numQueuesRx; j++) {
+       for (j = 0; j < ucc_geth_rx_queues(ug_info); j++) {
                /* Setup the skbuff rings */
                ugeth->rx_skbuff[j] =
-                       kmalloc_array(ugeth->ug_info->bdRingLenRx[j],
-                                     sizeof(struct sk_buff *), GFP_KERNEL);
+                       kcalloc(ugeth->ug_info->bdRingLenRx[j],
+                               sizeof(struct sk_buff *), GFP_KERNEL);
 
                if (ugeth->rx_skbuff[j] == NULL) {
                        if (netif_msg_ifup(ugeth))
@@ -2325,9 +2238,6 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth)
                        return -ENOMEM;
                }
 
-               for (i = 0; i < ugeth->ug_info->bdRingLenRx[j]; i++)
-                       ugeth->rx_skbuff[j][i] = NULL;
-
                ugeth->skb_currx[j] = 0;
                bd = ugeth->rxBd[j] = ugeth->p_rx_bd_ring[j];
                for (i = 0; i < ug_info->bdRingLenRx[j]; i++) {
@@ -2359,10 +2269,10 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
        u32 init_enet_pram_offset, cecr_subblock, command;
        u32 ifstat, i, j, size, l2qt, l3qt;
        u16 temoder = UCC_GETH_TEMODER_INIT;
-       u16 test;
        u8 function_code = 0;
        u8 __iomem *endOfRing;
        u8 numThreadsRxNumerical, numThreadsTxNumerical;
+       s32 rx_glbl_pram_offset, tx_glbl_pram_offset;
 
        ugeth_vdbg("%s: IN", __func__);
        uccf = ugeth->uccf;
@@ -2371,45 +2281,15 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
        uf_regs = uccf->uf_regs;
        ug_regs = ugeth->ug_regs;
 
-       switch (ug_info->numThreadsRx) {
-       case UCC_GETH_NUM_OF_THREADS_1:
-               numThreadsRxNumerical = 1;
-               break;
-       case UCC_GETH_NUM_OF_THREADS_2:
-               numThreadsRxNumerical = 2;
-               break;
-       case UCC_GETH_NUM_OF_THREADS_4:
-               numThreadsRxNumerical = 4;
-               break;
-       case UCC_GETH_NUM_OF_THREADS_6:
-               numThreadsRxNumerical = 6;
-               break;
-       case UCC_GETH_NUM_OF_THREADS_8:
-               numThreadsRxNumerical = 8;
-               break;
-       default:
+       numThreadsRxNumerical = ucc_geth_thread_count(ug_info->numThreadsRx);
+       if (!numThreadsRxNumerical) {
                if (netif_msg_ifup(ugeth))
                        pr_err("Bad number of Rx threads value\n");
                return -EINVAL;
        }
 
-       switch (ug_info->numThreadsTx) {
-       case UCC_GETH_NUM_OF_THREADS_1:
-               numThreadsTxNumerical = 1;
-               break;
-       case UCC_GETH_NUM_OF_THREADS_2:
-               numThreadsTxNumerical = 2;
-               break;
-       case UCC_GETH_NUM_OF_THREADS_4:
-               numThreadsTxNumerical = 4;
-               break;
-       case UCC_GETH_NUM_OF_THREADS_6:
-               numThreadsTxNumerical = 6;
-               break;
-       case UCC_GETH_NUM_OF_THREADS_8:
-               numThreadsTxNumerical = 8;
-               break;
-       default:
+       numThreadsTxNumerical = ucc_geth_thread_count(ug_info->numThreadsTx);
+       if (!numThreadsTxNumerical) {
                if (netif_msg_ifup(ugeth))
                        pr_err("Bad number of Tx threads value\n");
                return -EINVAL;
@@ -2507,20 +2387,15 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
         */
        /* Tx global PRAM */
        /* Allocate global tx parameter RAM page */
-       ugeth->tx_glbl_pram_offset =
+       tx_glbl_pram_offset =
            qe_muram_alloc(sizeof(struct ucc_geth_tx_global_pram),
                           UCC_GETH_TX_GLOBAL_PRAM_ALIGNMENT);
-       if (IS_ERR_VALUE(ugeth->tx_glbl_pram_offset)) {
+       if (tx_glbl_pram_offset < 0) {
                if (netif_msg_ifup(ugeth))
                        pr_err("Can not allocate DPRAM memory for p_tx_glbl_pram\n");
                return -ENOMEM;
        }
-       ugeth->p_tx_glbl_pram =
-           (struct ucc_geth_tx_global_pram __iomem *) qe_muram_addr(ugeth->
-                                                       tx_glbl_pram_offset);
-       /* Zero out p_tx_glbl_pram */
-       memset_io((void __iomem *)ugeth->p_tx_glbl_pram, 0, sizeof(struct ucc_geth_tx_global_pram));
-
+       ugeth->p_tx_glbl_pram = qe_muram_addr(tx_glbl_pram_offset);
        /* Fill global PRAM */
 
        /* TQPTR */
@@ -2554,7 +2429,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
        /* SQPTR */
        /* Size varies with number of Tx queues */
        ugeth->send_q_mem_reg_offset =
-           qe_muram_alloc(ug_info->numQueuesTx *
+           qe_muram_alloc(ucc_geth_tx_queues(ug_info) *
                           sizeof(struct ucc_geth_send_queue_qd),
                           UCC_GETH_SEND_QUEUE_QUEUE_DESCRIPTOR_ALIGNMENT);
        if (IS_ERR_VALUE(ugeth->send_q_mem_reg_offset)) {
@@ -2570,29 +2445,20 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 
        /* Setup the table */
        /* Assume BD rings are already established */
-       for (i = 0; i < ug_info->numQueuesTx; i++) {
+       for (i = 0; i < ucc_geth_tx_queues(ug_info); i++) {
                endOfRing =
                    ugeth->p_tx_bd_ring[i] + (ug_info->bdRingLenTx[i] -
                                              1) * sizeof(struct qe_bd);
-               if (ugeth->ug_info->uf_info.bd_mem_part == MEM_PART_SYSTEM) {
-                       out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base,
-                                (u32) virt_to_phys(ugeth->p_tx_bd_ring[i]));
-                       out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].
-                                last_bd_completed_address,
-                                (u32) virt_to_phys(endOfRing));
-               } else if (ugeth->ug_info->uf_info.bd_mem_part ==
-                          MEM_PART_MURAM) {
-                       out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base,
-                                (u32)qe_muram_dma(ugeth->p_tx_bd_ring[i]));
-                       out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].
-                                last_bd_completed_address,
-                                (u32)qe_muram_dma(endOfRing));
-               }
+               out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base,
+                        (u32) virt_to_phys(ugeth->p_tx_bd_ring[i]));
+               out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].
+                        last_bd_completed_address,
+                        (u32) virt_to_phys(endOfRing));
        }
 
        /* schedulerbasepointer */
 
-       if (ug_info->numQueuesTx > 1) {
+       if (ucc_geth_tx_queues(ug_info) > 1) {
        /* scheduler exists only if more than 1 tx queue */
                ugeth->scheduler_offset =
                    qe_muram_alloc(sizeof(struct ucc_geth_scheduler),
@@ -2608,8 +2474,6 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
                                                           scheduler_offset);
                out_be32(&ugeth->p_tx_glbl_pram->schedulerbasepointer,
                         ugeth->scheduler_offset);
-               /* Zero out p_scheduler */
-               memset_io((void __iomem *)ugeth->p_scheduler, 0, sizeof(struct ucc_geth_scheduler));
 
                /* Set values in scheduler */
                out_be32(&ugeth->p_scheduler->mblinterval,
@@ -2652,23 +2516,18 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
                ugeth->p_tx_fw_statistics_pram =
                    (struct ucc_geth_tx_firmware_statistics_pram __iomem *)
                    qe_muram_addr(ugeth->tx_fw_statistics_pram_offset);
-               /* Zero out p_tx_fw_statistics_pram */
-               memset_io((void __iomem *)ugeth->p_tx_fw_statistics_pram,
-                      0, sizeof(struct ucc_geth_tx_firmware_statistics_pram));
        }
 
        /* temoder */
        /* Already has speed set */
 
-       if (ug_info->numQueuesTx > 1)
+       if (ucc_geth_tx_queues(ug_info) > 1)
                temoder |= TEMODER_SCHEDULER_ENABLE;
        if (ug_info->ipCheckSumGenerate)
                temoder |= TEMODER_IP_CHECKSUM_GENERATE;
-       temoder |= ((ug_info->numQueuesTx - 1) << TEMODER_NUM_OF_QUEUES_SHIFT);
+       temoder |= ((ucc_geth_tx_queues(ug_info) - 1) << TEMODER_NUM_OF_QUEUES_SHIFT);
        out_be16(&ugeth->p_tx_glbl_pram->temoder, temoder);
 
-       test = in_be16(&ugeth->p_tx_glbl_pram->temoder);
-
        /* Function code register value to be used later */
        function_code = UCC_BMR_BO_BE | UCC_BMR_GBL;
        /* Required for QE */
@@ -2678,20 +2537,15 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 
        /* Rx global PRAM */
        /* Allocate global rx parameter RAM page */
-       ugeth->rx_glbl_pram_offset =
+       rx_glbl_pram_offset =
            qe_muram_alloc(sizeof(struct ucc_geth_rx_global_pram),
                           UCC_GETH_RX_GLOBAL_PRAM_ALIGNMENT);
-       if (IS_ERR_VALUE(ugeth->rx_glbl_pram_offset)) {
+       if (rx_glbl_pram_offset < 0) {
                if (netif_msg_ifup(ugeth))
                        pr_err("Can not allocate DPRAM memory for p_rx_glbl_pram\n");
                return -ENOMEM;
        }
-       ugeth->p_rx_glbl_pram =
-           (struct ucc_geth_rx_global_pram __iomem *) qe_muram_addr(ugeth->
-                                                       rx_glbl_pram_offset);
-       /* Zero out p_rx_glbl_pram */
-       memset_io((void __iomem *)ugeth->p_rx_glbl_pram, 0, sizeof(struct ucc_geth_rx_global_pram));
-
+       ugeth->p_rx_glbl_pram = qe_muram_addr(rx_glbl_pram_offset);
        /* Fill global PRAM */
 
        /* RQPTR */
@@ -2729,16 +2583,13 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
                ugeth->p_rx_fw_statistics_pram =
                    (struct ucc_geth_rx_firmware_statistics_pram __iomem *)
                    qe_muram_addr(ugeth->rx_fw_statistics_pram_offset);
-               /* Zero out p_rx_fw_statistics_pram */
-               memset_io((void __iomem *)ugeth->p_rx_fw_statistics_pram, 0,
-                      sizeof(struct ucc_geth_rx_firmware_statistics_pram));
        }
 
        /* intCoalescingPtr */
 
        /* Size varies with number of Rx queues */
        ugeth->rx_irq_coalescing_tbl_offset =
-           qe_muram_alloc(ug_info->numQueuesRx *
+           qe_muram_alloc(ucc_geth_rx_queues(ug_info) *
                           sizeof(struct ucc_geth_rx_interrupt_coalescing_entry)
                           + 4, UCC_GETH_RX_INTERRUPT_COALESCING_ALIGNMENT);
        if (IS_ERR_VALUE(ugeth->rx_irq_coalescing_tbl_offset)) {
@@ -2754,7 +2605,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
                 ugeth->rx_irq_coalescing_tbl_offset);
 
        /* Fill interrupt coalescing table */
-       for (i = 0; i < ug_info->numQueuesRx; i++) {
+       for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) {
                out_be32(&ugeth->p_rx_irq_coalescing_tbl->coalescingentry[i].
                         interruptcoalescingmaxvalue,
                         ug_info->interruptcoalescingmaxvalue[i]);
@@ -2803,7 +2654,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
        /* RBDQPTR */
        /* Size varies with number of Rx queues */
        ugeth->rx_bd_qs_tbl_offset =
-           qe_muram_alloc(ug_info->numQueuesRx *
+           qe_muram_alloc(ucc_geth_rx_queues(ug_info) *
                           (sizeof(struct ucc_geth_rx_bd_queues_entry) +
                            sizeof(struct ucc_geth_rx_prefetched_bds)),
                           UCC_GETH_RX_BD_QUEUES_ALIGNMENT);
@@ -2817,23 +2668,12 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
            (struct ucc_geth_rx_bd_queues_entry __iomem *) qe_muram_addr(ugeth->
                                    rx_bd_qs_tbl_offset);
        out_be32(&ugeth->p_rx_glbl_pram->rbdqptr, ugeth->rx_bd_qs_tbl_offset);
-       /* Zero out p_rx_bd_qs_tbl */
-       memset_io((void __iomem *)ugeth->p_rx_bd_qs_tbl,
-              0,
-              ug_info->numQueuesRx * (sizeof(struct ucc_geth_rx_bd_queues_entry) +
-                                      sizeof(struct ucc_geth_rx_prefetched_bds)));
 
        /* Setup the table */
        /* Assume BD rings are already established */
-       for (i = 0; i < ug_info->numQueuesRx; i++) {
-               if (ugeth->ug_info->uf_info.bd_mem_part == MEM_PART_SYSTEM) {
-                       out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr,
-                                (u32) virt_to_phys(ugeth->p_rx_bd_ring[i]));
-               } else if (ugeth->ug_info->uf_info.bd_mem_part ==
-                          MEM_PART_MURAM) {
-                       out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr,
-                                (u32)qe_muram_dma(ugeth->p_rx_bd_ring[i]));
-               }
+       for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) {
+               out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr,
+                        (u32) virt_to_phys(ugeth->p_rx_bd_ring[i]));
                /* rest of fields handled by QE */
        }
 
@@ -2854,7 +2694,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
            ug_info->
            vlanOperationNonTagged << REMODER_VLAN_OPERATION_NON_TAGGED_SHIFT;
        remoder |= ug_info->rxQoSMode << REMODER_RX_QOS_MODE_SHIFT;
-       remoder |= ((ug_info->numQueuesRx - 1) << REMODER_NUM_OF_QUEUES_SHIFT);
+       remoder |= ((ucc_geth_rx_queues(ug_info) - 1) << REMODER_NUM_OF_QUEUES_SHIFT);
        if (ug_info->ipCheckSumCheck)
                remoder |= REMODER_IP_CHECKSUM_CHECK;
        if (ug_info->ipAddressAlignment)
@@ -2937,14 +2777,11 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
         * allocated resources can be released when the channel is freed.
         */
        if (!(ugeth->p_init_enet_param_shadow =
-             kmalloc(sizeof(struct ucc_geth_init_pram), GFP_KERNEL))) {
+             kzalloc(sizeof(struct ucc_geth_init_pram), GFP_KERNEL))) {
                if (netif_msg_ifup(ugeth))
                        pr_err("Can not allocate memory for p_UccInitEnetParamShadows\n");
                return -ENOMEM;
        }
-       /* Zero out *p_init_enet_param_shadow */
-       memset((char *)ugeth->p_init_enet_param_shadow,
-              0, sizeof(struct ucc_geth_init_pram));
 
        /* Fill shadow InitEnet command parameter structure */
 
@@ -2964,7 +2801,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
            ((u32) ug_info->numThreadsTx) << ENET_INIT_PARAM_TGF_SHIFT;
 
        ugeth->p_init_enet_param_shadow->rgftgfrxglobal |=
-           ugeth->rx_glbl_pram_offset | ug_info->riscRx;
+           rx_glbl_pram_offset | ug_info->riscRx;
        if ((ug_info->largestexternallookupkeysize !=
             QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_NONE) &&
            (ug_info->largestexternallookupkeysize !=
@@ -3002,7 +2839,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
        }
 
        ugeth->p_init_enet_param_shadow->txglobal =
-           ugeth->tx_glbl_pram_offset | ug_info->riscTx;
+           tx_glbl_pram_offset | ug_info->riscTx;
        if ((ret_val =
             fill_init_enet_entries(ugeth,
                                    &(ugeth->p_init_enet_param_shadow->
@@ -3016,7 +2853,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
        }
 
        /* Load Rx bds with buffers */
-       for (i = 0; i < ug_info->numQueuesRx; i++) {
+       for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) {
                if ((ret_val = rx_bd_buffer_set(ugeth, (u8) i)) != 0) {
                        if (netif_msg_ifup(ugeth))
                                pr_err("Can not fill Rx bds with buffers\n");
@@ -3287,12 +3124,12 @@ static int ucc_geth_poll(struct napi_struct *napi, int budget)
 
        /* Tx event processing */
        spin_lock(&ugeth->lock);
-       for (i = 0; i < ug_info->numQueuesTx; i++)
+       for (i = 0; i < ucc_geth_tx_queues(ug_info); i++)
                ucc_geth_tx(ugeth->ndev, i);
        spin_unlock(&ugeth->lock);
 
        howmany = 0;
-       for (i = 0; i < ug_info->numQueuesRx; i++)
+       for (i = 0; i < ucc_geth_rx_queues(ug_info); i++)
                howmany += ucc_geth_rx(ugeth, i, budget - howmany);
 
        if (howmany < budget) {
@@ -3685,6 +3522,36 @@ static const struct net_device_ops ucc_geth_netdev_ops = {
 #endif
 };
 
+static int ucc_geth_parse_clock(struct device_node *np, const char *which,
+                               enum qe_clock *out)
+{
+       const char *sprop;
+       char buf[24];
+
+       snprintf(buf, sizeof(buf), "%s-clock-name", which);
+       sprop = of_get_property(np, buf, NULL);
+       if (sprop) {
+               *out = qe_clock_source(sprop);
+       } else {
+               u32 val;
+
+               snprintf(buf, sizeof(buf), "%s-clock", which);
+               if (of_property_read_u32(np, buf, &val)) {
+                       /* If both *-clock-name and *-clock are missing,
+                        * we want to tell people to use *-clock-name.
+                        */
+                       pr_err("missing %s-clock-name property\n", buf);
+                       return -EINVAL;
+               }
+               *out = val;
+       }
+       if (*out < QE_CLK_NONE || *out > QE_CLK24) {
+               pr_err("invalid %s property\n", buf);
+               return -EINVAL;
+       }
+       return 0;
+}
+
 static int ucc_geth_probe(struct platform_device* ofdev)
 {
        struct device *device = &ofdev->dev;
@@ -3695,7 +3562,6 @@ static int ucc_geth_probe(struct platform_device* ofdev)
        struct resource res;
        int err, ucc_num, max_speed = 0;
        const unsigned int *prop;
-       const char *sprop;
        const void *mac_addr;
        phy_interface_t phy_interface;
        static const int enet_to_speed[] = {
@@ -3725,62 +3591,23 @@ static int ucc_geth_probe(struct platform_device* ofdev)
        if ((ucc_num < 0) || (ucc_num > 7))
                return -ENODEV;
 
-       ug_info = &ugeth_info[ucc_num];
-       if (ug_info == NULL) {
-               if (netif_msg_probe(&debug))
-                       pr_err("[%d] Missing additional data!\n", ucc_num);
-               return -ENODEV;
-       }
+       ug_info = kmalloc(sizeof(*ug_info), GFP_KERNEL);
+       if (ug_info == NULL)
+               return -ENOMEM;
+       memcpy(ug_info, &ugeth_primary_info, sizeof(*ug_info));
 
        ug_info->uf_info.ucc_num = ucc_num;
 
-       sprop = of_get_property(np, "rx-clock-name", NULL);
-       if (sprop) {
-               ug_info->uf_info.rx_clock = qe_clock_source(sprop);
-               if ((ug_info->uf_info.rx_clock < QE_CLK_NONE) ||
-                   (ug_info->uf_info.rx_clock > QE_CLK24)) {
-                       pr_err("invalid rx-clock-name property\n");
-                       return -EINVAL;
-               }
-       } else {
-               prop = of_get_property(np, "rx-clock", NULL);
-               if (!prop) {
-                       /* If both rx-clock-name and rx-clock are missing,
-                          we want to tell people to use rx-clock-name. */
-                       pr_err("missing rx-clock-name property\n");
-                       return -EINVAL;
-               }
-               if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) {
-                       pr_err("invalid rx-clock property\n");
-                       return -EINVAL;
-               }
-               ug_info->uf_info.rx_clock = *prop;
-       }
-
-       sprop = of_get_property(np, "tx-clock-name", NULL);
-       if (sprop) {
-               ug_info->uf_info.tx_clock = qe_clock_source(sprop);
-               if ((ug_info->uf_info.tx_clock < QE_CLK_NONE) ||
-                   (ug_info->uf_info.tx_clock > QE_CLK24)) {
-                       pr_err("invalid tx-clock-name property\n");
-                       return -EINVAL;
-               }
-       } else {
-               prop = of_get_property(np, "tx-clock", NULL);
-               if (!prop) {
-                       pr_err("missing tx-clock-name property\n");
-                       return -EINVAL;
-               }
-               if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) {
-                       pr_err("invalid tx-clock property\n");
-                       return -EINVAL;
-               }
-               ug_info->uf_info.tx_clock = *prop;
-       }
+       err = ucc_geth_parse_clock(np, "rx", &ug_info->uf_info.rx_clock);
+       if (err)
+               goto err_free_info;
+       err = ucc_geth_parse_clock(np, "tx", &ug_info->uf_info.tx_clock);
+       if (err)
+               goto err_free_info;
 
        err = of_address_to_resource(np, 0, &res);
        if (err)
-               return -EINVAL;
+               goto err_free_info;
 
        ug_info->uf_info.regs = res.start;
        ug_info->uf_info.irq = irq_of_parse_and_map(np, 0);
@@ -3793,7 +3620,7 @@ static int ucc_geth_probe(struct platform_device* ofdev)
                 */
                err = of_phy_register_fixed_link(np);
                if (err)
-                       return err;
+                       goto err_free_info;
                ug_info->phy_node = of_node_get(np);
        }
 
@@ -3924,6 +3751,8 @@ err_deregister_fixed_link:
                of_phy_deregister_fixed_link(np);
        of_node_put(ug_info->tbi_node);
        of_node_put(ug_info->phy_node);
+err_free_info:
+       kfree(ug_info);
 
        return err;
 }
@@ -3940,6 +3769,7 @@ static int ucc_geth_remove(struct platform_device* ofdev)
                of_phy_deregister_fixed_link(np);
        of_node_put(ugeth->ug_info->tbi_node);
        of_node_put(ugeth->ug_info->phy_node);
+       kfree(ugeth->ug_info);
        free_netdev(dev);
 
        return 0;
@@ -3968,17 +3798,10 @@ static struct platform_driver ucc_geth_driver = {
 
 static int __init ucc_geth_init(void)
 {
-       int i, ret;
-
        if (netif_msg_drv(&debug))
                pr_info(DRV_DESC "\n");
-       for (i = 0; i < 8; i++)
-               memcpy(&(ugeth_info[i]), &ugeth_primary_info,
-                      sizeof(ugeth_primary_info));
-
-       ret = platform_driver_register(&ucc_geth_driver);
 
-       return ret;
+       return platform_driver_register(&ucc_geth_driver);
 }
 
 static void __exit ucc_geth_exit(void)
index 11d4bf5..4294ed0 100644 (file)
@@ -1076,8 +1076,6 @@ struct ucc_geth_tad_params {
 /* GETH protocol initialization structure */
 struct ucc_geth_info {
        struct ucc_fast_info uf_info;
-       u8 numQueuesTx;
-       u8 numQueuesRx;
        int ipCheckSumCheck;
        int ipCheckSumGenerate;
        int rxExtendedFiltering;
@@ -1165,9 +1163,7 @@ struct ucc_geth_private {
        struct ucc_geth_exf_global_pram __iomem *p_exf_glbl_param;
        u32 exf_glbl_param_offset;
        struct ucc_geth_rx_global_pram __iomem *p_rx_glbl_pram;
-       u32 rx_glbl_pram_offset;
        struct ucc_geth_tx_global_pram __iomem *p_tx_glbl_pram;
-       u32 tx_glbl_pram_offset;
        struct ucc_geth_send_queue_mem_region __iomem *p_send_q_mem_reg;
        u32 send_q_mem_reg_offset;
        struct ucc_geth_thread_data_tx __iomem *p_thread_data_tx;
@@ -1185,9 +1181,7 @@ struct ucc_geth_private {
        struct ucc_geth_rx_bd_queues_entry __iomem *p_rx_bd_qs_tbl;
        u32 rx_bd_qs_tbl_offset;
        u8 __iomem *p_tx_bd_ring[NUM_TX_QUEUES];
-       u32 tx_bd_ring_offset[NUM_TX_QUEUES];
        u8 __iomem *p_rx_bd_ring[NUM_RX_QUEUES];
-       u32 rx_bd_ring_offset[NUM_RX_QUEUES];
        u8 __iomem *confBd[NUM_TX_QUEUES];
        u8 __iomem *txBd[NUM_TX_QUEUES];
        u8 __iomem *rxBd[NUM_RX_QUEUES];
index 405e490..5120806 100644 (file)
@@ -1070,7 +1070,7 @@ static bool hns3_check_hw_tx_csum(struct sk_buff *skb)
         * HW checksum of the non-IP packets and GSO packets is handled at
         * different place in the following code
         */
-       if (skb->csum_not_inet || skb_is_gso(skb) ||
+       if (skb_csum_is_sctp(skb) || skb_is_gso(skb) ||
            !test_bit(HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, &priv->state))
                return false;
 
index 41815b6..7fe15a3 100644 (file)
@@ -94,7 +94,6 @@ config MVPP2
 
 config MVPP2_PTP
        bool "Marvell Armada 8K Enable PTP support"
-       depends on NETWORK_PHY_TIMESTAMPING
        depends on (PTP_1588_CLOCK = y && MVPP2 = y) || \
                   (PTP_1588_CLOCK && MVPP2 = m)
 
index f919283..89e93eb 100644 (file)
@@ -717,6 +717,8 @@ struct nix_rss_flowkey_cfg {
 #define NIX_FLOW_KEY_TYPE_INNR_ETH_DMAC BIT(17)
 #define NIX_FLOW_KEY_TYPE_VLAN         BIT(20)
 #define NIX_FLOW_KEY_TYPE_IPV4_PROTO   BIT(21)
+#define NIX_FLOW_KEY_TYPE_AH           BIT(22)
+#define NIX_FLOW_KEY_TYPE_ESP          BIT(23)
        u32     flowkey_cfg; /* Flowkey types selected */
        u8      group;       /* RSS context or group */
 };
index bc0e411..10a98bc 100644 (file)
@@ -52,6 +52,650 @@ static bool rvu_common_request_irq(struct rvu *rvu, int offset,
        return rvu->irq_allocated[offset];
 }
 
+static void rvu_nix_intr_work(struct work_struct *work)
+{
+       struct rvu_nix_health_reporters *rvu_nix_health_reporter;
+
+       rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, intr_work);
+       devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_intr_reporter,
+                             "NIX_AF_RVU Error",
+                             rvu_nix_health_reporter->nix_event_ctx);
+}
+
+static irqreturn_t rvu_nix_af_rvu_intr_handler(int irq, void *rvu_irq)
+{
+       struct rvu_nix_event_ctx *nix_event_context;
+       struct rvu_devlink *rvu_dl = rvu_irq;
+       struct rvu *rvu;
+       int blkaddr;
+       u64 intr;
+
+       rvu = rvu_dl->rvu;
+       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+       if (blkaddr < 0)
+               return IRQ_NONE;
+
+       nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+       intr = rvu_read64(rvu, blkaddr, NIX_AF_RVU_INT);
+       nix_event_context->nix_af_rvu_int = intr;
+
+       /* Clear interrupts */
+       rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT, intr);
+       rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1C, ~0ULL);
+       queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->intr_work);
+
+       return IRQ_HANDLED;
+}
+
+static void rvu_nix_gen_work(struct work_struct *work)
+{
+       struct rvu_nix_health_reporters *rvu_nix_health_reporter;
+
+       rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, gen_work);
+       devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_gen_reporter,
+                             "NIX_AF_GEN Error",
+                             rvu_nix_health_reporter->nix_event_ctx);
+}
+
+static irqreturn_t rvu_nix_af_rvu_gen_handler(int irq, void *rvu_irq)
+{
+       struct rvu_nix_event_ctx *nix_event_context;
+       struct rvu_devlink *rvu_dl = rvu_irq;
+       struct rvu *rvu;
+       int blkaddr;
+       u64 intr;
+
+       rvu = rvu_dl->rvu;
+       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+       if (blkaddr < 0)
+               return IRQ_NONE;
+
+       nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+       intr = rvu_read64(rvu, blkaddr, NIX_AF_GEN_INT);
+       nix_event_context->nix_af_rvu_gen = intr;
+
+       /* Clear interrupts */
+       rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT, intr);
+       rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1C, ~0ULL);
+       queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->gen_work);
+
+       return IRQ_HANDLED;
+}
+
+static void rvu_nix_err_work(struct work_struct *work)
+{
+       struct rvu_nix_health_reporters *rvu_nix_health_reporter;
+
+       rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, err_work);
+       devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_err_reporter,
+                             "NIX_AF_ERR Error",
+                             rvu_nix_health_reporter->nix_event_ctx);
+}
+
+static irqreturn_t rvu_nix_af_rvu_err_handler(int irq, void *rvu_irq)
+{
+       struct rvu_nix_event_ctx *nix_event_context;
+       struct rvu_devlink *rvu_dl = rvu_irq;
+       struct rvu *rvu;
+       int blkaddr;
+       u64 intr;
+
+       rvu = rvu_dl->rvu;
+       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+       if (blkaddr < 0)
+               return IRQ_NONE;
+
+       nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+       intr = rvu_read64(rvu, blkaddr, NIX_AF_ERR_INT);
+       nix_event_context->nix_af_rvu_err = intr;
+
+       /* Clear interrupts */
+       rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT, intr);
+       rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1C, ~0ULL);
+       queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->err_work);
+
+       return IRQ_HANDLED;
+}
+
+static void rvu_nix_ras_work(struct work_struct *work)
+{
+       struct rvu_nix_health_reporters *rvu_nix_health_reporter;
+
+       rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, ras_work);
+       devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_ras_reporter,
+                             "NIX_AF_RAS Error",
+                             rvu_nix_health_reporter->nix_event_ctx);
+}
+
+static irqreturn_t rvu_nix_af_rvu_ras_handler(int irq, void *rvu_irq)
+{
+       struct rvu_nix_event_ctx *nix_event_context;
+       struct rvu_devlink *rvu_dl = rvu_irq;
+       struct rvu *rvu;
+       int blkaddr;
+       u64 intr;
+
+       rvu = rvu_dl->rvu;
+       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+       if (blkaddr < 0)
+               return IRQ_NONE;
+
+       nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+       intr = rvu_read64(rvu, blkaddr, NIX_AF_ERR_INT);
+       nix_event_context->nix_af_rvu_ras = intr;
+
+       /* Clear interrupts */
+       rvu_write64(rvu, blkaddr, NIX_AF_RAS, intr);
+       rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1C, ~0ULL);
+       queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->ras_work);
+
+       return IRQ_HANDLED;
+}
+
+static void rvu_nix_unregister_interrupts(struct rvu *rvu)
+{
+       struct rvu_devlink *rvu_dl = rvu->rvu_dl;
+       int offs, i, blkaddr;
+
+       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+       if (blkaddr < 0)
+               return;
+
+       offs = rvu_read64(rvu, blkaddr, NIX_PRIV_AF_INT_CFG) & 0x3ff;
+       if (!offs)
+               return;
+
+       rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1C, ~0ULL);
+       rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1C, ~0ULL);
+       rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1C, ~0ULL);
+       rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1C, ~0ULL);
+
+       if (rvu->irq_allocated[offs + NIX_AF_INT_VEC_RVU]) {
+               free_irq(pci_irq_vector(rvu->pdev, offs + NIX_AF_INT_VEC_RVU),
+                        rvu_dl);
+               rvu->irq_allocated[offs + NIX_AF_INT_VEC_RVU] = false;
+       }
+
+       for (i = NIX_AF_INT_VEC_AF_ERR; i < NIX_AF_INT_VEC_CNT; i++)
+               if (rvu->irq_allocated[offs + i]) {
+                       free_irq(pci_irq_vector(rvu->pdev, offs + i), rvu_dl);
+                       rvu->irq_allocated[offs + i] = false;
+               }
+}
+
+static int rvu_nix_register_interrupts(struct rvu *rvu)
+{
+       int blkaddr, base;
+       bool rc;
+
+       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+       if (blkaddr < 0)
+               return blkaddr;
+
+       /* Get NIX AF MSIX vectors offset. */
+       base = rvu_read64(rvu, blkaddr, NIX_PRIV_AF_INT_CFG) & 0x3ff;
+       if (!base) {
+               dev_warn(rvu->dev,
+                        "Failed to get NIX%d NIX_AF_INT vector offsets\n",
+                        blkaddr - BLKADDR_NIX0);
+               return 0;
+       }
+       /* Register and enable NIX_AF_RVU_INT interrupt */
+       rc = rvu_common_request_irq(rvu, base +  NIX_AF_INT_VEC_RVU,
+                                   "NIX_AF_RVU_INT",
+                                   rvu_nix_af_rvu_intr_handler);
+       if (!rc)
+               goto err;
+       rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1S, ~0ULL);
+
+       /* Register and enable NIX_AF_GEN_INT interrupt */
+       rc = rvu_common_request_irq(rvu, base +  NIX_AF_INT_VEC_GEN,
+                                   "NIX_AF_GEN_INT",
+                                   rvu_nix_af_rvu_gen_handler);
+       if (!rc)
+               goto err;
+       rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1S, ~0ULL);
+
+       /* Register and enable NIX_AF_ERR_INT interrupt */
+       rc = rvu_common_request_irq(rvu, base + NIX_AF_INT_VEC_AF_ERR,
+                                   "NIX_AF_ERR_INT",
+                                   rvu_nix_af_rvu_err_handler);
+       if (!rc)
+               goto err;
+       rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1S, ~0ULL);
+
+       /* Register and enable NIX_AF_RAS interrupt */
+       rc = rvu_common_request_irq(rvu, base + NIX_AF_INT_VEC_POISON,
+                                   "NIX_AF_RAS",
+                                   rvu_nix_af_rvu_ras_handler);
+       if (!rc)
+               goto err;
+       rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1S, ~0ULL);
+
+       return 0;
+err:
+       rvu_nix_unregister_interrupts(rvu);
+       return rc;
+}
+
+static int rvu_nix_report_show(struct devlink_fmsg *fmsg, void *ctx,
+                              enum nix_af_rvu_health health_reporter)
+{
+       struct rvu_nix_event_ctx *nix_event_context;
+       u64 intr_val;
+       int err;
+
+       nix_event_context = ctx;
+       switch (health_reporter) {
+       case NIX_AF_RVU_INTR:
+               intr_val = nix_event_context->nix_af_rvu_int;
+               err = rvu_report_pair_start(fmsg, "NIX_AF_RVU");
+               if (err)
+                       return err;
+               err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX RVU Interrupt Reg ",
+                                               nix_event_context->nix_af_rvu_int);
+               if (err)
+                       return err;
+               if (intr_val & BIT_ULL(0)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tUnmap Slot Error");
+                       if (err)
+                               return err;
+               }
+               err = rvu_report_pair_end(fmsg);
+               if (err)
+                       return err;
+               break;
+       case NIX_AF_RVU_GEN:
+               intr_val = nix_event_context->nix_af_rvu_gen;
+               err = rvu_report_pair_start(fmsg, "NIX_AF_GENERAL");
+               if (err)
+                       return err;
+               err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX General Interrupt Reg ",
+                                               nix_event_context->nix_af_rvu_gen);
+               if (err)
+                       return err;
+               if (intr_val & BIT_ULL(0)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tRx multicast pkt drop");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(1)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tRx mirror pkt drop");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(4)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tSMQ flush done");
+                       if (err)
+                               return err;
+               }
+               err = rvu_report_pair_end(fmsg);
+               if (err)
+                       return err;
+               break;
+       case NIX_AF_RVU_ERR:
+               intr_val = nix_event_context->nix_af_rvu_err;
+               err = rvu_report_pair_start(fmsg, "NIX_AF_ERR");
+               if (err)
+                       return err;
+               err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX Error Interrupt Reg ",
+                                               nix_event_context->nix_af_rvu_err);
+               if (err)
+                       return err;
+               if (intr_val & BIT_ULL(14)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tFault on NIX_AQ_INST_S read");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(13)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tFault on NIX_AQ_RES_S write");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(12)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tAQ Doorbell Error");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(6)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tRx on unmapped PF_FUNC");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(5)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tRx multicast replication error");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(4)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tFault on NIX_RX_MCE_S read");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(3)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tFault on multicast WQE read");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(2)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tFault on mirror WQE read");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(1)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tFault on mirror pkt write");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(0)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tFault on multicast pkt write");
+                       if (err)
+                               return err;
+               }
+               err = rvu_report_pair_end(fmsg);
+               if (err)
+                       return err;
+               break;
+       case NIX_AF_RVU_RAS:
+               intr_val = nix_event_context->nix_af_rvu_err;
+               err = rvu_report_pair_start(fmsg, "NIX_AF_RAS");
+               if (err)
+                       return err;
+               err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX RAS Interrupt Reg ",
+                                               nix_event_context->nix_af_rvu_err);
+               if (err)
+                       return err;
+               err = devlink_fmsg_string_put(fmsg, "\n\tPoison Data on:");
+               if (err)
+                       return err;
+               if (intr_val & BIT_ULL(34)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tNIX_AQ_INST_S");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(33)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tNIX_AQ_RES_S");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(32)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tHW ctx");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(4)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tPacket from mirror buffer");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(3)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tPacket from multicast buffer");
+
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(2)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tWQE read from mirror buffer");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(1)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tWQE read from multicast buffer");
+                       if (err)
+                               return err;
+               }
+               if (intr_val & BIT_ULL(0)) {
+                       err = devlink_fmsg_string_put(fmsg, "\n\tNIX_RX_MCE_S read");
+                       if (err)
+                               return err;
+               }
+               err = rvu_report_pair_end(fmsg);
+               if (err)
+                       return err;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int rvu_hw_nix_intr_dump(struct devlink_health_reporter *reporter,
+                               struct devlink_fmsg *fmsg, void *ctx,
+                               struct netlink_ext_ack *netlink_extack)
+{
+       struct rvu *rvu = devlink_health_reporter_priv(reporter);
+       struct rvu_devlink *rvu_dl = rvu->rvu_dl;
+       struct rvu_nix_event_ctx *nix_ctx;
+
+       nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+
+       return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_INTR) :
+                    rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_INTR);
+}
+
+static int rvu_hw_nix_intr_recover(struct devlink_health_reporter *reporter,
+                                  void *ctx, struct netlink_ext_ack *netlink_extack)
+{
+       struct rvu *rvu = devlink_health_reporter_priv(reporter);
+       struct rvu_nix_event_ctx *nix_event_ctx = ctx;
+       int blkaddr;
+
+       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+       if (blkaddr < 0)
+               return blkaddr;
+
+       if (nix_event_ctx->nix_af_rvu_int)
+               rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1S, ~0ULL);
+
+       return 0;
+}
+
+static int rvu_hw_nix_gen_dump(struct devlink_health_reporter *reporter,
+                              struct devlink_fmsg *fmsg, void *ctx,
+                              struct netlink_ext_ack *netlink_extack)
+{
+       struct rvu *rvu = devlink_health_reporter_priv(reporter);
+       struct rvu_devlink *rvu_dl = rvu->rvu_dl;
+       struct rvu_nix_event_ctx *nix_ctx;
+
+       nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+
+       return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_GEN) :
+                    rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_GEN);
+}
+
+static int rvu_hw_nix_gen_recover(struct devlink_health_reporter *reporter,
+                                 void *ctx, struct netlink_ext_ack *netlink_extack)
+{
+       struct rvu *rvu = devlink_health_reporter_priv(reporter);
+       struct rvu_nix_event_ctx *nix_event_ctx = ctx;
+       int blkaddr;
+
+       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+       if (blkaddr < 0)
+               return blkaddr;
+
+       if (nix_event_ctx->nix_af_rvu_gen)
+               rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1S, ~0ULL);
+
+       return 0;
+}
+
+static int rvu_hw_nix_err_dump(struct devlink_health_reporter *reporter,
+                              struct devlink_fmsg *fmsg, void *ctx,
+                              struct netlink_ext_ack *netlink_extack)
+{
+       struct rvu *rvu = devlink_health_reporter_priv(reporter);
+       struct rvu_devlink *rvu_dl = rvu->rvu_dl;
+       struct rvu_nix_event_ctx *nix_ctx;
+
+       nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+
+       return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_ERR) :
+                    rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_ERR);
+}
+
+static int rvu_hw_nix_err_recover(struct devlink_health_reporter *reporter,
+                                 void *ctx, struct netlink_ext_ack *netlink_extack)
+{
+       struct rvu *rvu = devlink_health_reporter_priv(reporter);
+       struct rvu_nix_event_ctx *nix_event_ctx = ctx;
+       int blkaddr;
+
+       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+       if (blkaddr < 0)
+               return blkaddr;
+
+       if (nix_event_ctx->nix_af_rvu_err)
+               rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1S, ~0ULL);
+
+       return 0;
+}
+
+static int rvu_hw_nix_ras_dump(struct devlink_health_reporter *reporter,
+                              struct devlink_fmsg *fmsg, void *ctx,
+                              struct netlink_ext_ack *netlink_extack)
+{
+       struct rvu *rvu = devlink_health_reporter_priv(reporter);
+       struct rvu_devlink *rvu_dl = rvu->rvu_dl;
+       struct rvu_nix_event_ctx *nix_ctx;
+
+       nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+
+       return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_RAS) :
+                    rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_RAS);
+}
+
+static int rvu_hw_nix_ras_recover(struct devlink_health_reporter *reporter,
+                                 void *ctx, struct netlink_ext_ack *netlink_extack)
+{
+       struct rvu *rvu = devlink_health_reporter_priv(reporter);
+       struct rvu_nix_event_ctx *nix_event_ctx = ctx;
+       int blkaddr;
+
+       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+       if (blkaddr < 0)
+               return blkaddr;
+
+       if (nix_event_ctx->nix_af_rvu_int)
+               rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1S, ~0ULL);
+
+       return 0;
+}
+
+RVU_REPORTERS(hw_nix_intr);
+RVU_REPORTERS(hw_nix_gen);
+RVU_REPORTERS(hw_nix_err);
+RVU_REPORTERS(hw_nix_ras);
+
+static void rvu_nix_health_reporters_destroy(struct rvu_devlink *rvu_dl);
+
+static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl)
+{
+       struct rvu_nix_health_reporters *rvu_reporters;
+       struct rvu_nix_event_ctx *nix_event_context;
+       struct rvu *rvu = rvu_dl->rvu;
+
+       rvu_reporters = kzalloc(sizeof(*rvu_reporters), GFP_KERNEL);
+       if (!rvu_reporters)
+               return -ENOMEM;
+
+       rvu_dl->rvu_nix_health_reporter = rvu_reporters;
+       nix_event_context = kzalloc(sizeof(*nix_event_context), GFP_KERNEL);
+       if (!nix_event_context)
+               return -ENOMEM;
+
+       rvu_reporters->nix_event_ctx = nix_event_context;
+       rvu_reporters->rvu_hw_nix_intr_reporter =
+               devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_intr_reporter_ops, 0, rvu);
+       if (IS_ERR(rvu_reporters->rvu_hw_nix_intr_reporter)) {
+               dev_warn(rvu->dev, "Failed to create hw_nix_intr reporter, err=%ld\n",
+                        PTR_ERR(rvu_reporters->rvu_hw_nix_intr_reporter));
+               return PTR_ERR(rvu_reporters->rvu_hw_nix_intr_reporter);
+       }
+
+       rvu_reporters->rvu_hw_nix_gen_reporter =
+               devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_gen_reporter_ops, 0, rvu);
+       if (IS_ERR(rvu_reporters->rvu_hw_nix_gen_reporter)) {
+               dev_warn(rvu->dev, "Failed to create hw_nix_gen reporter, err=%ld\n",
+                        PTR_ERR(rvu_reporters->rvu_hw_nix_gen_reporter));
+               return PTR_ERR(rvu_reporters->rvu_hw_nix_gen_reporter);
+       }
+
+       rvu_reporters->rvu_hw_nix_err_reporter =
+               devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_err_reporter_ops, 0, rvu);
+       if (IS_ERR(rvu_reporters->rvu_hw_nix_err_reporter)) {
+               dev_warn(rvu->dev, "Failed to create hw_nix_err reporter, err=%ld\n",
+                        PTR_ERR(rvu_reporters->rvu_hw_nix_err_reporter));
+               return PTR_ERR(rvu_reporters->rvu_hw_nix_err_reporter);
+       }
+
+       rvu_reporters->rvu_hw_nix_ras_reporter =
+               devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_ras_reporter_ops, 0, rvu);
+       if (IS_ERR(rvu_reporters->rvu_hw_nix_ras_reporter)) {
+               dev_warn(rvu->dev, "Failed to create hw_nix_ras reporter, err=%ld\n",
+                        PTR_ERR(rvu_reporters->rvu_hw_nix_ras_reporter));
+               return PTR_ERR(rvu_reporters->rvu_hw_nix_ras_reporter);
+       }
+
+       rvu_dl->devlink_wq = create_workqueue("rvu_devlink_wq");
+       if (!rvu_dl->devlink_wq)
+               goto err;
+
+       INIT_WORK(&rvu_reporters->intr_work, rvu_nix_intr_work);
+       INIT_WORK(&rvu_reporters->gen_work, rvu_nix_gen_work);
+       INIT_WORK(&rvu_reporters->err_work, rvu_nix_err_work);
+       INIT_WORK(&rvu_reporters->ras_work, rvu_nix_ras_work);
+
+       return 0;
+err:
+       rvu_nix_health_reporters_destroy(rvu_dl);
+       return -ENOMEM;
+}
+
+static int rvu_nix_health_reporters_create(struct rvu_devlink *rvu_dl)
+{
+       struct rvu *rvu = rvu_dl->rvu;
+       int err;
+
+       err = rvu_nix_register_reporters(rvu_dl);
+       if (err) {
+               dev_warn(rvu->dev, "Failed to create nix reporter, err =%d\n",
+                        err);
+               return err;
+       }
+       rvu_nix_register_interrupts(rvu);
+
+       return 0;
+}
+
+static void rvu_nix_health_reporters_destroy(struct rvu_devlink *rvu_dl)
+{
+       struct rvu_nix_health_reporters *nix_reporters;
+       struct rvu *rvu = rvu_dl->rvu;
+
+       nix_reporters = rvu_dl->rvu_nix_health_reporter;
+
+       if (!nix_reporters->rvu_hw_nix_ras_reporter)
+               return;
+       if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_intr_reporter))
+               devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_intr_reporter);
+
+       if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_gen_reporter))
+               devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_gen_reporter);
+
+       if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_err_reporter))
+               devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_err_reporter);
+
+       if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_ras_reporter))
+               devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_ras_reporter);
+
+       rvu_nix_unregister_interrupts(rvu);
+       kfree(rvu_dl->rvu_nix_health_reporter->nix_event_ctx);
+       kfree(rvu_dl->rvu_nix_health_reporter);
+}
+
 static void rvu_npa_intr_work(struct work_struct *work)
 {
        struct rvu_npa_health_reporters *rvu_npa_health_reporter;
@@ -698,9 +1342,14 @@ static void rvu_npa_health_reporters_destroy(struct rvu_devlink *rvu_dl)
 static int rvu_health_reporters_create(struct rvu *rvu)
 {
        struct rvu_devlink *rvu_dl;
+       int err;
 
        rvu_dl = rvu->rvu_dl;
-       return rvu_npa_health_reporters_create(rvu_dl);
+       err = rvu_npa_health_reporters_create(rvu_dl);
+       if (err)
+               return err;
+
+       return rvu_nix_health_reporters_create(rvu_dl);
 }
 
 static void rvu_health_reporters_destroy(struct rvu *rvu)
@@ -712,6 +1361,7 @@ static void rvu_health_reporters_destroy(struct rvu *rvu)
 
        rvu_dl = rvu->rvu_dl;
        rvu_npa_health_reporters_destroy(rvu_dl);
+       rvu_nix_health_reporters_destroy(rvu_dl);
 }
 
 static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
index d7578fa..471e57d 100644 (file)
@@ -41,11 +41,38 @@ struct rvu_npa_health_reporters {
        struct work_struct              ras_work;
 };
 
+enum nix_af_rvu_health {
+       NIX_AF_RVU_INTR,
+       NIX_AF_RVU_GEN,
+       NIX_AF_RVU_ERR,
+       NIX_AF_RVU_RAS,
+};
+
+struct rvu_nix_event_ctx {
+       u64 nix_af_rvu_int;
+       u64 nix_af_rvu_gen;
+       u64 nix_af_rvu_err;
+       u64 nix_af_rvu_ras;
+};
+
+struct rvu_nix_health_reporters {
+       struct rvu_nix_event_ctx *nix_event_ctx;
+       struct devlink_health_reporter *rvu_hw_nix_intr_reporter;
+       struct work_struct              intr_work;
+       struct devlink_health_reporter *rvu_hw_nix_gen_reporter;
+       struct work_struct              gen_work;
+       struct devlink_health_reporter *rvu_hw_nix_err_reporter;
+       struct work_struct              err_work;
+       struct devlink_health_reporter *rvu_hw_nix_ras_reporter;
+       struct work_struct              ras_work;
+};
+
 struct rvu_devlink {
        struct devlink *dl;
        struct rvu *rvu;
        struct workqueue_struct *devlink_wq;
        struct rvu_npa_health_reporters *rvu_npa_health_reporter;
+       struct rvu_nix_health_reporters *rvu_nix_health_reporter;
 };
 
 /* Devlink APIs */
index a8dfbb6..b54753e 100644 (file)
@@ -2580,6 +2580,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
        struct nix_rx_flowkey_alg *field;
        struct nix_rx_flowkey_alg tmp;
        u32 key_type, valid_key;
+       int l4_key_offset;
 
        if (!alg)
                return -EINVAL;
@@ -2712,6 +2713,12 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
                                field_marker = false;
                                keyoff_marker = false;
                        }
+
+                       /* TCP/UDP/SCTP and ESP/AH falls at same offset so
+                        * remember the TCP key offset of 40 byte hash key.
+                        */
+                       if (key_type == NIX_FLOW_KEY_TYPE_TCP)
+                               l4_key_offset = key_off;
                        break;
                case NIX_FLOW_KEY_TYPE_NVGRE:
                        field->lid = NPC_LID_LD;
@@ -2783,11 +2790,31 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
                        field->ltype_mask = 0xF;
                        field->fn_mask = 1; /* Mask out the first nibble */
                        break;
+               case NIX_FLOW_KEY_TYPE_AH:
+               case NIX_FLOW_KEY_TYPE_ESP:
+                       field->hdr_offset = 0;
+                       field->bytesm1 = 7; /* SPI + sequence number */
+                       field->ltype_mask = 0xF;
+                       field->lid = NPC_LID_LE;
+                       field->ltype_match = NPC_LT_LE_ESP;
+                       if (key_type == NIX_FLOW_KEY_TYPE_AH) {
+                               field->lid = NPC_LID_LD;
+                               field->ltype_match = NPC_LT_LD_AH;
+                               field->hdr_offset = 4;
+                               keyoff_marker = false;
+                       }
+                       break;
                }
                field->ena = 1;
 
                /* Found a valid flow key type */
                if (valid_key) {
+                       /* Use the key offset of TCP/UDP/SCTP fields
+                        * for ESP/AH fields.
+                        */
+                       if (key_type == NIX_FLOW_KEY_TYPE_ESP ||
+                           key_type == NIX_FLOW_KEY_TYPE_AH)
+                               key_off = l4_key_offset;
                        field->key_offset = key_off;
                        memcpy(&alg[nr_field], field, sizeof(*field));
                        max_key_off = max(max_key_off, field->bytesm1 + 1);
index e2153d4..5e15f4f 100644 (file)
@@ -74,6 +74,16 @@ enum npa_af_int_vec_e {
        NPA_AF_INT_VEC_CNT      = 0x5,
 };
 
+/* NIX Admin function Interrupt Vector Enumeration */
+enum nix_af_int_vec_e {
+       NIX_AF_INT_VEC_RVU      = 0x0,
+       NIX_AF_INT_VEC_GEN      = 0x1,
+       NIX_AF_INT_VEC_AQ_DONE  = 0x2,
+       NIX_AF_INT_VEC_AF_ERR   = 0x3,
+       NIX_AF_INT_VEC_POISON   = 0x4,
+       NIX_AF_INT_VEC_CNT      = 0x5,
+};
+
 /**
  * RVU PF Interrupt Vector Enumeration
  */
index aaba045..e0199f0 100644 (file)
@@ -448,10 +448,14 @@ static int otx2_get_rss_hash_opts(struct otx2_nic *pfvf,
                        nfc->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
                break;
        case AH_ESP_V4_FLOW:
+       case AH_ESP_V6_FLOW:
+               if (rss->flowkey_cfg & NIX_FLOW_KEY_TYPE_ESP)
+                       nfc->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+               break;
        case AH_V4_FLOW:
        case ESP_V4_FLOW:
        case IPV4_FLOW:
-       case AH_ESP_V6_FLOW:
+               break;
        case AH_V6_FLOW:
        case ESP_V6_FLOW:
        case IPV6_FLOW:
@@ -459,6 +463,7 @@ static int otx2_get_rss_hash_opts(struct otx2_nic *pfvf,
        default:
                return -EINVAL;
        }
+
        return 0;
 }
 
@@ -527,6 +532,36 @@ static int otx2_set_rss_hash_opts(struct otx2_nic *pfvf,
                        return -EINVAL;
                }
                break;
+       case AH_ESP_V4_FLOW:
+       case AH_ESP_V6_FLOW:
+               switch (nfc->data & rxh_l4) {
+               case 0:
+                       rss_cfg &= ~(NIX_FLOW_KEY_TYPE_ESP |
+                                    NIX_FLOW_KEY_TYPE_AH);
+                       rss_cfg |= NIX_FLOW_KEY_TYPE_VLAN |
+                                  NIX_FLOW_KEY_TYPE_IPV4_PROTO;
+                       break;
+               case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+                       /* If VLAN hashing is also requested for ESP then do not
+                        * allow because of hardware 40 bytes flow key limit.
+                        */
+                       if (rss_cfg & NIX_FLOW_KEY_TYPE_VLAN) {
+                               netdev_err(pfvf->netdev,
+                                          "RSS hash of ESP or AH with VLAN is not supported\n");
+                               return -EOPNOTSUPP;
+                       }
+
+                       rss_cfg |= NIX_FLOW_KEY_TYPE_ESP | NIX_FLOW_KEY_TYPE_AH;
+                       /* Disable IPv4 proto hashing since IPv6 SA+DA(32 bytes)
+                        * and ESP SPI+sequence(8 bytes) uses hardware maximum
+                        * limit of 40 byte flow key.
+                        */
+                       rss_cfg &= ~NIX_FLOW_KEY_TYPE_IPV4_PROTO;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               break;
        case IPV4_FLOW:
        case IPV6_FLOW:
                rss_cfg = NIX_FLOW_KEY_TYPE_IPV4 | NIX_FLOW_KEY_TYPE_IPV6;
index 6d2d606..01d3ee4 100644 (file)
@@ -353,7 +353,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
        /* Setup gmac */
        mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
        mcr_new = mcr_cur;
-       mcr_new |= MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
+       mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
                   MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK;
 
        /* Only update control register when needed! */
@@ -759,8 +759,8 @@ static void mtk_get_stats64(struct net_device *dev,
 static inline int mtk_max_frag_size(int mtu)
 {
        /* make sure buf_size will be at least MTK_MAX_RX_LENGTH */
-       if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH)
-               mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
+       if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH_2K)
+               mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN;
 
        return SKB_DATA_ALIGN(MTK_RX_HLEN + mtu) +
                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -771,7 +771,7 @@ static inline int mtk_max_buf_size(int frag_size)
        int buf_size = frag_size - NET_SKB_PAD - NET_IP_ALIGN -
                       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
-       WARN_ON(buf_size < MTK_MAX_RX_LENGTH);
+       WARN_ON(buf_size < MTK_MAX_RX_LENGTH_2K);
 
        return buf_size;
 }
@@ -2499,6 +2499,35 @@ static void mtk_uninit(struct net_device *dev)
        mtk_rx_irq_disable(eth, ~0);
 }
 
+static int mtk_change_mtu(struct net_device *dev, int new_mtu)
+{
+       int length = new_mtu + MTK_RX_ETH_HLEN;
+       struct mtk_mac *mac = netdev_priv(dev);
+       struct mtk_eth *eth = mac->hw;
+       u32 mcr_cur, mcr_new;
+
+       if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+               mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
+               mcr_new = mcr_cur & ~MAC_MCR_MAX_RX_MASK;
+
+               if (length <= 1518)
+                       mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1518);
+               else if (length <= 1536)
+                       mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1536);
+               else if (length <= 1552)
+                       mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1552);
+               else
+                       mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_2048);
+
+               if (mcr_new != mcr_cur)
+                       mtk_w32(mac->hw, mcr_new, MTK_MAC_MCR(mac->id));
+       }
+
+       dev->mtu = new_mtu;
+
+       return 0;
+}
+
 static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
        struct mtk_mac *mac = netdev_priv(dev);
@@ -2795,6 +2824,7 @@ static const struct net_device_ops mtk_netdev_ops = {
        .ndo_set_mac_address    = mtk_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_do_ioctl           = mtk_do_ioctl,
+       .ndo_change_mtu         = mtk_change_mtu,
        .ndo_tx_timeout         = mtk_tx_timeout,
        .ndo_get_stats64        = mtk_get_stats64,
        .ndo_fix_features       = mtk_fix_features,
@@ -2896,7 +2926,10 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
        eth->netdev[id]->irq = eth->irq[0];
        eth->netdev[id]->dev.of_node = np;
 
-       eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
+       if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628))
+               eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
+       else
+               eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN;
 
        return 0;
 
index 454cfcd..fd3cec8 100644 (file)
 #include <linux/phylink.h>
 
 #define MTK_QDMA_PAGE_SIZE     2048
-#define        MTK_MAX_RX_LENGTH       1536
+#define MTK_MAX_RX_LENGTH      1536
+#define MTK_MAX_RX_LENGTH_2K   2048
 #define MTK_TX_DMA_BUF_LEN     0x3fff
 #define MTK_DMA_SIZE           256
 #define MTK_NAPI_WEIGHT                64
 #define MTK_MAC_COUNT          2
-#define MTK_RX_ETH_HLEN                (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
+#define MTK_RX_ETH_HLEN                (ETH_HLEN + ETH_FCS_LEN)
 #define MTK_RX_HLEN            (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
 #define MTK_DMA_DUMMY_DESC     0xffffffff
 #define MTK_DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | \
 
 /* Mac control registers */
 #define MTK_MAC_MCR(x)         (0x10100 + (x * 0x100))
-#define MAC_MCR_MAX_RX_1536    BIT(24)
+#define MAC_MCR_MAX_RX_MASK    GENMASK(25, 24)
+#define MAC_MCR_MAX_RX(_x)     (MAC_MCR_MAX_RX_MASK & ((_x) << 24))
+#define MAC_MCR_MAX_RX_1518    0x0
+#define MAC_MCR_MAX_RX_1536    0x1
+#define MAC_MCR_MAX_RX_1552    0x2
+#define MAC_MCR_MAX_RX_2048    0x3
 #define MAC_MCR_IPG_CFG                (BIT(18) | BIT(16))
 #define MAC_MCR_FORCE_MODE     BIT(15)
 #define MAC_MCR_TX_EN          BIT(14)
index 134bd03..fcfc0b1 100644 (file)
@@ -16,7 +16,8 @@ mlx5_core-y :=        main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
                transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
                fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
                lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
-               diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o fw_reset.o
+               diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
+               fw_reset.o qos.o
 
 #
 # Netdev basic
@@ -25,7 +26,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
                en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
                en_selftest.o en/port.o en/monitor_stats.o en/health.o \
                en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
-               en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o
+               en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
+               en/qos.o
 
 #
 # Netdev extra
index 055baf3..26e578a 100644 (file)
@@ -55,6 +55,7 @@
 #include "en_stats.h"
 #include "en/dcbnl.h"
 #include "en/fs.h"
+#include "en/qos.h"
 #include "lib/hv_vhca.h"
 
 extern const struct net_device_ops mlx5e_netdev_ops;
@@ -161,6 +162,9 @@ do {                                                            \
                            ##__VA_ARGS__);                     \
 } while (0)
 
+#define mlx5e_state_dereference(priv, p) \
+       rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))
+
 enum mlx5e_rq_group {
        MLX5E_RQ_GROUP_REGULAR,
        MLX5E_RQ_GROUP_XSK,
@@ -663,11 +667,13 @@ struct mlx5e_channel {
        struct mlx5e_xdpsq         rq_xdpsq;
        struct mlx5e_txqsq         sq[MLX5E_MAX_NUM_TC];
        struct mlx5e_icosq         icosq;   /* internal control operations */
+       struct mlx5e_txqsq __rcu * __rcu *qos_sqs;
        bool                       xdp;
        struct napi_struct         napi;
        struct device             *pdev;
        struct net_device         *netdev;
        __be32                     mkey_be;
+       u16                        qos_sqs_size;
        u8                         num_tc;
        u8                         lag_port;
 
@@ -756,6 +762,8 @@ struct mlx5e_modify_sq_param {
        int next_state;
        int rl_update;
        int rl_index;
+       bool qos_update;
+       u16 qos_queue_group_id;
 };
 
 #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
@@ -788,10 +796,20 @@ struct mlx5e_scratchpad {
        cpumask_var_t cpumask;
 };
 
+struct mlx5e_htb {
+       DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES));
+       DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES);
+       struct mlx5e_sq_stats **qos_sq_stats;
+       u16 max_qos_sqs;
+       u16 maj_id;
+       u16 defcls;
+};
+
 struct mlx5e_priv {
        /* priv data path fields - start */
        /* +1 for port ptp ts */
-       struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC];
+       struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC +
+                                  MLX5E_QOS_MAX_LEAF_NODES];
        int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
        int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC];
 #ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -859,6 +877,7 @@ struct mlx5e_priv {
        struct mlx5e_hv_vhca_stats_agent stats_agent;
 #endif
        struct mlx5e_scratchpad    scratchpad;
+       struct mlx5e_htb           htb;
 };
 
 struct mlx5e_rx_handlers {
@@ -986,6 +1005,7 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
                               struct mlx5e_channels *new_chs,
                               mlx5e_fp_preactivate preactivate,
                               void *context);
+int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv);
 int mlx5e_num_channels_changed(struct mlx5e_priv *priv);
 int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
@@ -1010,6 +1030,9 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);
 
 int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
                    struct mlx5e_modify_sq_param *p);
+int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
+                    struct mlx5e_params *params, struct mlx5e_sq_param *param,
+                    struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid);
 void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
 void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq);
 void mlx5e_free_txqsq(struct mlx5e_txqsq *sq);
@@ -1020,8 +1043,10 @@ struct mlx5e_create_sq_param;
 int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
                        struct mlx5e_sq_param *param,
                        struct mlx5e_create_sq_param *csp,
+                       u16 qos_queue_group_id,
                        u32 *sqn);
 void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
+void mlx5e_close_txqsq(struct mlx5e_txqsq *sq);
 
 static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
 {
index 807147d..ea2cfb0 100644 (file)
@@ -118,6 +118,8 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
                          struct mlx5e_rq_param *param);
 void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
                                 struct mlx5e_sq_param *param);
+void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
+                         struct mlx5e_sq_param *param);
 void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
                             struct mlx5e_params *params,
                             struct mlx5e_xsk_param *xsk,
index 2a2bac3..eeddd11 100644 (file)
@@ -261,7 +261,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn,
        csp.min_inline_mode = txqsq->min_inline_mode;
        csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn;
 
-       err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, &txqsq->sqn);
+       err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn);
        if (err)
                goto err_free_txqsq;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
new file mode 100644 (file)
index 0000000..12d7ad0
--- /dev/null
@@ -0,0 +1,984 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "en.h"
+#include "params.h"
+#include "../qos.h"
+
+#define BYTES_IN_MBIT 125000
+
+int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
+{
+       return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev));
+}
+
+int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv)
+{
+       int last = find_last_bit(priv->htb.qos_used_qids, mlx5e_qos_max_leaf_nodes(priv->mdev));
+
+       return last == mlx5e_qos_max_leaf_nodes(priv->mdev) ? 0 : last + 1;
+}
+
+/* Software representation of the QoS tree (internal to this file) */
+
+static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv)
+{
+       int size = mlx5e_qos_max_leaf_nodes(priv->mdev);
+       int res;
+
+       WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__);
+       res = find_first_zero_bit(priv->htb.qos_used_qids, size);
+
+       return res == size ? -ENOSPC : res;
+}
+
+struct mlx5e_qos_node {
+       struct hlist_node hnode;
+       struct rcu_head rcu;
+       struct mlx5e_qos_node *parent;
+       u64 rate;
+       u32 bw_share;
+       u32 max_average_bw;
+       u32 hw_id;
+       u32 classid; /* 16-bit, except root. */
+       u16 qid;
+};
+
+#define MLX5E_QOS_QID_INNER 0xffff
+#define MLX5E_HTB_CLASSID_ROOT 0xffffffff
+
+static struct mlx5e_qos_node *
+mlx5e_sw_node_create_leaf(struct mlx5e_priv *priv, u16 classid, u16 qid,
+                         struct mlx5e_qos_node *parent)
+{
+       struct mlx5e_qos_node *node;
+
+       node = kzalloc(sizeof(*node), GFP_KERNEL);
+       if (!node)
+               return ERR_PTR(-ENOMEM);
+
+       node->parent = parent;
+
+       node->qid = qid;
+       __set_bit(qid, priv->htb.qos_used_qids);
+
+       node->classid = classid;
+       hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, classid);
+
+       mlx5e_update_tx_netdev_queues(priv);
+
+       return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_sw_node_create_root(struct mlx5e_priv *priv)
+{
+       struct mlx5e_qos_node *node;
+
+       node = kzalloc(sizeof(*node), GFP_KERNEL);
+       if (!node)
+               return ERR_PTR(-ENOMEM);
+
+       node->qid = MLX5E_QOS_QID_INNER;
+       node->classid = MLX5E_HTB_CLASSID_ROOT;
+       hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, node->classid);
+
+       return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_sw_node_find(struct mlx5e_priv *priv, u32 classid)
+{
+       struct mlx5e_qos_node *node = NULL;
+
+       hash_for_each_possible(priv->htb.qos_tc2node, node, hnode, classid) {
+               if (node->classid == classid)
+                       break;
+       }
+
+       return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_sw_node_find_rcu(struct mlx5e_priv *priv, u32 classid)
+{
+       struct mlx5e_qos_node *node = NULL;
+
+       hash_for_each_possible_rcu(priv->htb.qos_tc2node, node, hnode, classid) {
+               if (node->classid == classid)
+                       break;
+       }
+
+       return node;
+}
+
+static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node *node)
+{
+       hash_del_rcu(&node->hnode);
+       if (node->qid != MLX5E_QOS_QID_INNER) {
+               __clear_bit(node->qid, priv->htb.qos_used_qids);
+               mlx5e_update_tx_netdev_queues(priv);
+       }
+       kfree_rcu(node, rcu);
+}
+
+/* TX datapath API */
+
+static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
+{
+       /* These channel params are safe to access from the datapath, because:
+        * 1. This function is called only after checking priv->htb.maj_id != 0,
+        *    and the number of queues can't change while HTB offload is active.
+        * 2. When priv->htb.maj_id becomes 0, synchronize_rcu waits for
+        *    mlx5e_select_queue to finish while holding priv->state_lock,
+        *    preventing other code from changing the number of queues.
+        */
+       bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS);
+
+       return (chs->params.num_channels + is_ptp) * chs->params.num_tc + qid;
+}
+
+int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid)
+{
+       struct mlx5e_qos_node *node;
+       u16 qid;
+       int res;
+
+       rcu_read_lock();
+
+       node = mlx5e_sw_node_find_rcu(priv, classid);
+       if (!node) {
+               res = -ENOENT;
+               goto out;
+       }
+       qid = READ_ONCE(node->qid);
+       if (qid == MLX5E_QOS_QID_INNER) {
+               res = -EINVAL;
+               goto out;
+       }
+       res = mlx5e_qid_from_qos(&priv->channels, qid);
+
+out:
+       rcu_read_unlock();
+       return res;
+}
+
+static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid)
+{
+       struct mlx5e_params *params = &priv->channels.params;
+       struct mlx5e_txqsq __rcu **qos_sqs;
+       struct mlx5e_channel *c;
+       int ix;
+
+       ix = qid % params->num_channels;
+       qid /= params->num_channels;
+       c = priv->channels.c[ix];
+
+       qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+       return mlx5e_state_dereference(priv, qos_sqs[qid]);
+}
+
+/* SQ lifecycle */
+
+static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+                            struct mlx5e_qos_node *node)
+{
+       struct mlx5e_create_cq_param ccp = {};
+       struct mlx5e_txqsq __rcu **qos_sqs;
+       struct mlx5e_sq_param param_sq;
+       struct mlx5e_cq_param param_cq;
+       int txq_ix, ix, qid, err = 0;
+       struct mlx5e_params *params;
+       struct mlx5e_channel *c;
+       struct mlx5e_txqsq *sq;
+
+       params = &chs->params;
+
+       txq_ix = mlx5e_qid_from_qos(chs, node->qid);
+
+       WARN_ON(node->qid > priv->htb.max_qos_sqs);
+       if (node->qid == priv->htb.max_qos_sqs) {
+               struct mlx5e_sq_stats *stats, **stats_list = NULL;
+
+               if (priv->htb.max_qos_sqs == 0) {
+                       stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+                                             sizeof(*stats_list),
+                                             GFP_KERNEL);
+                       if (!stats_list)
+                               return -ENOMEM;
+               }
+               stats = kzalloc(sizeof(*stats), GFP_KERNEL);
+               if (!stats) {
+                       kvfree(stats_list);
+                       return -ENOMEM;
+               }
+               if (stats_list)
+                       WRITE_ONCE(priv->htb.qos_sq_stats, stats_list);
+               WRITE_ONCE(priv->htb.qos_sq_stats[node->qid], stats);
+               /* Order max_qos_sqs increment after writing the array pointer.
+                * Pairs with smp_load_acquire in en_stats.c.
+                */
+               smp_store_release(&priv->htb.max_qos_sqs, priv->htb.max_qos_sqs + 1);
+       }
+
+       ix = node->qid % params->num_channels;
+       qid = node->qid / params->num_channels;
+       c = chs->c[ix];
+
+       qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+       sq = kzalloc(sizeof(*sq), GFP_KERNEL);
+
+       if (!sq)
+               return -ENOMEM;
+
+       mlx5e_build_create_cq_param(&ccp, c);
+
+       memset(&param_sq, 0, sizeof(param_sq));
+       memset(&param_cq, 0, sizeof(param_cq));
+       mlx5e_build_sq_param(priv, params, &param_sq);
+       mlx5e_build_tx_cq_param(priv, params, &param_cq);
+       err = mlx5e_open_cq(priv, params->tx_cq_moderation, &param_cq, &ccp, &sq->cq);
+       if (err)
+               goto err_free_sq;
+       err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params,
+                              &param_sq, sq, 0, node->hw_id, node->qid);
+       if (err)
+               goto err_close_cq;
+
+       rcu_assign_pointer(qos_sqs[qid], sq);
+
+       return 0;
+
+err_close_cq:
+       mlx5e_close_cq(&sq->cq);
+err_free_sq:
+       kfree(sq);
+       return err;
+}
+
+static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node)
+{
+       struct mlx5e_txqsq *sq;
+
+       sq = mlx5e_get_qos_sq(priv, node->qid);
+
+       WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq);
+
+       /* Make the change to txq2sq visible before the queue is started.
+        * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+        * which pairs with this barrier.
+        */
+       smp_wmb();
+
+       qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node->qid);
+       mlx5e_activate_txqsq(sq);
+}
+
+static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
+{
+       struct mlx5e_txqsq *sq;
+
+       sq = mlx5e_get_qos_sq(priv, qid);
+       if (!sq) /* Handle the case when the SQ failed to open. */
+               return;
+
+       qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid);
+       mlx5e_deactivate_txqsq(sq);
+
+       /* The queue is disabled, no synchronization with datapath is needed. */
+       priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
+}
+
+static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid)
+{
+       struct mlx5e_txqsq __rcu **qos_sqs;
+       struct mlx5e_params *params;
+       struct mlx5e_channel *c;
+       struct mlx5e_txqsq *sq;
+       int ix;
+
+       params = &priv->channels.params;
+
+       ix = qid % params->num_channels;
+       qid /= params->num_channels;
+       c = priv->channels.c[ix];
+       qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+       sq = rcu_replace_pointer(qos_sqs[qid], NULL, lockdep_is_held(&priv->state_lock));
+       if (!sq) /* Handle the case when the SQ failed to open. */
+               return;
+
+       synchronize_rcu(); /* Sync with NAPI. */
+
+       mlx5e_close_txqsq(sq);
+       mlx5e_close_cq(&sq->cq);
+       kfree(sq);
+}
+
+void mlx5e_qos_close_queues(struct mlx5e_channel *c)
+{
+       struct mlx5e_txqsq __rcu **qos_sqs;
+       int i;
+
+       qos_sqs = rcu_replace_pointer(c->qos_sqs, NULL, lockdep_is_held(&c->priv->state_lock));
+       if (!qos_sqs)
+               return;
+       synchronize_rcu(); /* Sync with NAPI. */
+
+       for (i = 0; i < c->qos_sqs_size; i++) {
+               struct mlx5e_txqsq *sq;
+
+               sq = mlx5e_state_dereference(c->priv, qos_sqs[i]);
+               if (!sq) /* Handle the case when the SQ failed to open. */
+                       continue;
+
+               mlx5e_close_txqsq(sq);
+               mlx5e_close_cq(&sq->cq);
+               kfree(sq);
+       }
+
+       kvfree(qos_sqs);
+}
+
+static void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs)
+{
+       int i;
+
+       for (i = 0; i < chs->num; i++)
+               mlx5e_qos_close_queues(chs->c[i]);
+}
+
+static int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+       u16 qos_sqs_size;
+       int i;
+
+       qos_sqs_size = DIV_ROUND_UP(mlx5e_qos_max_leaf_nodes(priv->mdev), chs->num);
+
+       for (i = 0; i < chs->num; i++) {
+               struct mlx5e_txqsq **sqs;
+
+               sqs = kvcalloc(qos_sqs_size, sizeof(struct mlx5e_txqsq *), GFP_KERNEL);
+               if (!sqs)
+                       goto err_free;
+
+               WRITE_ONCE(chs->c[i]->qos_sqs_size, qos_sqs_size);
+               smp_wmb(); /* Pairs with mlx5e_napi_poll. */
+               rcu_assign_pointer(chs->c[i]->qos_sqs, sqs);
+       }
+
+       return 0;
+
+err_free:
+       while (--i >= 0) {
+               struct mlx5e_txqsq **sqs;
+
+               sqs = rcu_replace_pointer(chs->c[i]->qos_sqs, NULL,
+                                         lockdep_is_held(&priv->state_lock));
+
+               synchronize_rcu(); /* Sync with NAPI. */
+               kvfree(sqs);
+       }
+       return -ENOMEM;
+}
+
+int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+       struct mlx5e_qos_node *node = NULL;
+       int bkt, err;
+
+       if (!priv->htb.maj_id)
+               return 0;
+
+       err = mlx5e_qos_alloc_queues(priv, chs);
+       if (err)
+               return err;
+
+       hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) {
+               if (node->qid == MLX5E_QOS_QID_INNER)
+                       continue;
+               err = mlx5e_open_qos_sq(priv, chs, node);
+               if (err) {
+                       mlx5e_qos_close_all_queues(chs);
+                       return err;
+               }
+       }
+
+       return 0;
+}
+
+void mlx5e_qos_activate_queues(struct mlx5e_priv *priv)
+{
+       struct mlx5e_qos_node *node = NULL;
+       int bkt;
+
+       hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) {
+               if (node->qid == MLX5E_QOS_QID_INNER)
+                       continue;
+               mlx5e_activate_qos_sq(priv, node);
+       }
+}
+
+void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
+{
+       struct mlx5e_params *params = &c->priv->channels.params;
+       struct mlx5e_txqsq __rcu **qos_sqs;
+       int i;
+
+       qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs);
+       if (!qos_sqs)
+               return;
+
+       for (i = 0; i < c->qos_sqs_size; i++) {
+               u16 qid = params->num_channels * i + c->ix;
+               struct mlx5e_txqsq *sq;
+
+               sq = mlx5e_state_dereference(c->priv, qos_sqs[i]);
+               if (!sq) /* Handle the case when the SQ failed to open. */
+                       continue;
+
+               qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid);
+               mlx5e_deactivate_txqsq(sq);
+
+               /* The queue is disabled, no synchronization with datapath is needed. */
+               c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL;
+       }
+}
+
+static void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs)
+{
+       int i;
+
+       for (i = 0; i < chs->num; i++)
+               mlx5e_qos_deactivate_queues(chs->c[i]);
+}
+
+/* HTB API */
+
+int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
+                      struct netlink_ext_ack *extack)
+{
+       struct mlx5e_qos_node *root;
+       bool opened;
+       int err;
+
+       qos_dbg(priv->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls);
+
+       if (!mlx5_qos_is_supported(priv->mdev)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
+               return -EOPNOTSUPP;
+       }
+
+       opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+       if (opened) {
+               err = mlx5e_qos_alloc_queues(priv, &priv->channels);
+               if (err)
+                       return err;
+       }
+
+       root = mlx5e_sw_node_create_root(priv);
+       if (IS_ERR(root)) {
+               err = PTR_ERR(root);
+               goto err_free_queues;
+       }
+
+       err = mlx5_qos_create_root_node(priv->mdev, &root->hw_id);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware.");
+               goto err_sw_node_delete;
+       }
+
+       WRITE_ONCE(priv->htb.defcls, htb_defcls);
+       /* Order maj_id after defcls - pairs with
+        * mlx5e_select_queue/mlx5e_select_htb_queues.
+        */
+       smp_store_release(&priv->htb.maj_id, htb_maj_id);
+
+       return 0;
+
+err_sw_node_delete:
+       mlx5e_sw_node_delete(priv, root);
+
+err_free_queues:
+       if (opened)
+               mlx5e_qos_close_all_queues(&priv->channels);
+       return err;
+}
+
+int mlx5e_htb_root_del(struct mlx5e_priv *priv)
+{
+       struct mlx5e_qos_node *root;
+       int err;
+
+       qos_dbg(priv->mdev, "TC_HTB_DESTROY\n");
+
+       WRITE_ONCE(priv->htb.maj_id, 0);
+       synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */
+
+       root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT);
+       if (!root) {
+               qos_err(priv->mdev, "Failed to find the root node in the QoS tree\n");
+               return -ENOENT;
+       }
+       err = mlx5_qos_destroy_node(priv->mdev, root->hw_id);
+       if (err)
+               qos_err(priv->mdev, "Failed to destroy root node %u, err = %d\n",
+                       root->hw_id, err);
+       mlx5e_sw_node_delete(priv, root);
+
+       mlx5e_qos_deactivate_all_queues(&priv->channels);
+       mlx5e_qos_close_all_queues(&priv->channels);
+
+       return err;
+}
+
+static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate,
+                                 struct mlx5e_qos_node *parent, u32 *bw_share)
+{
+       u64 share = 0;
+
+       while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw)
+               parent = parent->parent;
+
+       if (parent->max_average_bw)
+               share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT),
+                                 parent->max_average_bw);
+       else
+               share = 101;
+
+       *bw_share = share == 0 ? 1 : share > 100 ? 0 : share;
+
+       qos_dbg(priv->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n",
+               rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share);
+
+       return 0;
+}
+
+static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw)
+{
+       *max_average_bw = div_u64(ceil, BYTES_IN_MBIT);
+
+       qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
+               ceil, *max_average_bw);
+}
+
+int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
+                              u32 parent_classid, u64 rate, u64 ceil,
+                              struct netlink_ext_ack *extack)
+{
+       struct mlx5e_qos_node *node, *parent;
+       int qid;
+       int err;
+
+       qos_dbg(priv->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n",
+               classid, parent_classid, rate, ceil);
+
+       qid = mlx5e_find_unused_qos_qid(priv);
+       if (qid < 0) {
+               NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached.");
+               return qid;
+       }
+
+       parent = mlx5e_sw_node_find(priv, parent_classid);
+       if (!parent)
+               return -EINVAL;
+
+       node = mlx5e_sw_node_create_leaf(priv, classid, qid, parent);
+       if (IS_ERR(node))
+               return PTR_ERR(node);
+
+       node->rate = rate;
+       mlx5e_htb_convert_rate(priv, rate, node->parent, &node->bw_share);
+       mlx5e_htb_convert_ceil(priv, ceil, &node->max_average_bw);
+
+       err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->hw_id,
+                                       node->bw_share, node->max_average_bw,
+                                       &node->hw_id);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+               qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+                       classid, err);
+               mlx5e_sw_node_delete(priv, node);
+               return err;
+       }
+
+       if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               err = mlx5e_open_qos_sq(priv, &priv->channels, node);
+               if (err) {
+                       NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+                       qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+                                classid, err);
+               } else {
+                       mlx5e_activate_qos_sq(priv, node);
+               }
+       }
+
+       return mlx5e_qid_from_qos(&priv->channels, node->qid);
+}
+
+int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
+                           u64 rate, u64 ceil, struct netlink_ext_ack *extack)
+{
+       struct mlx5e_qos_node *node, *child;
+       int err, tmp_err;
+       u32 new_hw_id;
+       u16 qid;
+
+       qos_dbg(priv->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n",
+               classid, child_classid, rate, ceil);
+
+       node = mlx5e_sw_node_find(priv, classid);
+       if (!node)
+               return -ENOENT;
+
+       err = mlx5_qos_create_inner_node(priv->mdev, node->parent->hw_id,
+                                        node->bw_share, node->max_average_bw,
+                                        &new_hw_id);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node.");
+               qos_err(priv->mdev, "Failed to create an inner node (class %04x), err = %d\n",
+                       classid, err);
+               return err;
+       }
+
+       /* Intentionally reuse the qid for the upcoming first child. */
+       child = mlx5e_sw_node_create_leaf(priv, child_classid, node->qid, node);
+       if (IS_ERR(child)) {
+               err = PTR_ERR(child);
+               goto err_destroy_hw_node;
+       }
+
+       child->rate = rate;
+       mlx5e_htb_convert_rate(priv, rate, node, &child->bw_share);
+       mlx5e_htb_convert_ceil(priv, ceil, &child->max_average_bw);
+
+       err = mlx5_qos_create_leaf_node(priv->mdev, new_hw_id, child->bw_share,
+                                       child->max_average_bw, &child->hw_id);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+               qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+                       classid, err);
+               goto err_delete_sw_node;
+       }
+
+       /* No fail point. */
+
+       qid = node->qid;
+       /* Pairs with mlx5e_get_txq_by_classid. */
+       WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+       if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               mlx5e_deactivate_qos_sq(priv, qid);
+               mlx5e_close_qos_sq(priv, qid);
+       }
+
+       err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
+       if (err) /* Not fatal. */
+               qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+                        node->hw_id, classid, err);
+
+       node->hw_id = new_hw_id;
+
+       if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               err = mlx5e_open_qos_sq(priv, &priv->channels, child);
+               if (err) {
+                       NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+                       qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+                                classid, err);
+               } else {
+                       mlx5e_activate_qos_sq(priv, child);
+               }
+       }
+
+       return 0;
+
+err_delete_sw_node:
+       child->qid = MLX5E_QOS_QID_INNER;
+       mlx5e_sw_node_delete(priv, child);
+
+err_destroy_hw_node:
+       tmp_err = mlx5_qos_destroy_node(priv->mdev, new_hw_id);
+       if (tmp_err) /* Not fatal. */
+               qos_warn(priv->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n",
+                        new_hw_id, classid, tmp_err);
+       return err;
+}
+
+static struct mlx5e_qos_node *mlx5e_sw_node_find_by_qid(struct mlx5e_priv *priv, u16 qid)
+{
+       struct mlx5e_qos_node *node = NULL;
+       int bkt;
+
+       hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode)
+               if (node->qid == qid)
+                       break;
+
+       return node;
+}
+
+static void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq)
+{
+       qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid);
+       netdev_tx_reset_queue(txq);
+       netif_tx_start_queue(txq);
+}
+
+static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
+{
+       struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid);
+       struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+
+       if (!qdisc)
+               return;
+
+       spin_lock_bh(qdisc_lock(qdisc));
+       qdisc_reset(qdisc);
+       spin_unlock_bh(qdisc_lock(qdisc));
+}
+
+int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
+                      u16 *new_qid, struct netlink_ext_ack *extack)
+{
+       struct mlx5e_qos_node *node;
+       struct netdev_queue *txq;
+       u16 qid, moved_qid;
+       bool opened;
+       int err;
+
+       qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", classid);
+
+       *old_qid = *new_qid = 0;
+
+       node = mlx5e_sw_node_find(priv, classid);
+       if (!node)
+               return -ENOENT;
+
+       /* Store qid for reuse. */
+       qid = node->qid;
+
+       opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+       if (opened) {
+               txq = netdev_get_tx_queue(priv->netdev,
+                                         mlx5e_qid_from_qos(&priv->channels, qid));
+               mlx5e_deactivate_qos_sq(priv, qid);
+               mlx5e_close_qos_sq(priv, qid);
+       }
+
+       err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
+       if (err) /* Not fatal. */
+               qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+                        node->hw_id, classid, err);
+
+       mlx5e_sw_node_delete(priv, node);
+
+       moved_qid = mlx5e_qos_cur_leaf_nodes(priv);
+
+       if (moved_qid == 0) {
+               /* The last QoS SQ was just destroyed. */
+               if (opened)
+                       mlx5e_reactivate_qos_sq(priv, qid, txq);
+               return 0;
+       }
+       moved_qid--;
+
+       if (moved_qid < qid) {
+               /* The highest QoS SQ was just destroyed. */
+               WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u",
+                    qid, moved_qid);
+               if (opened)
+                       mlx5e_reactivate_qos_sq(priv, qid, txq);
+               return 0;
+       }
+
+       WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid);
+       qos_dbg(priv->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid);
+
+       node = mlx5e_sw_node_find_by_qid(priv, moved_qid);
+       WARN(!node, "Could not find a node with qid %u to move to queue %u",
+            moved_qid, qid);
+
+       /* Stop traffic to the old queue. */
+       WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+       __clear_bit(moved_qid, priv->htb.qos_used_qids);
+
+       if (opened) {
+               txq = netdev_get_tx_queue(priv->netdev,
+                                         mlx5e_qid_from_qos(&priv->channels, moved_qid));
+               mlx5e_deactivate_qos_sq(priv, moved_qid);
+               mlx5e_close_qos_sq(priv, moved_qid);
+       }
+
+       /* Prevent packets from the old class from getting into the new one. */
+       mlx5e_reset_qdisc(priv->netdev, moved_qid);
+
+       __set_bit(qid, priv->htb.qos_used_qids);
+       WRITE_ONCE(node->qid, qid);
+
+       if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               err = mlx5e_open_qos_sq(priv, &priv->channels, node);
+               if (err) {
+                       NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+                       qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n",
+                                node->classid, moved_qid, qid, err);
+               } else {
+                       mlx5e_activate_qos_sq(priv, node);
+               }
+       }
+
+       mlx5e_update_tx_netdev_queues(priv);
+       if (opened)
+               mlx5e_reactivate_qos_sq(priv, moved_qid, txq);
+
+       *old_qid = mlx5e_qid_from_qos(&priv->channels, moved_qid);
+       *new_qid = mlx5e_qid_from_qos(&priv->channels, qid);
+       return 0;
+}
+
+int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
+                           struct netlink_ext_ack *extack)
+{
+       struct mlx5e_qos_node *node, *parent;
+       u32 old_hw_id, new_hw_id;
+       int err, saved_err = 0;
+       u16 qid;
+
+       qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n",
+               force ? "_FORCE" : "", classid);
+
+       node = mlx5e_sw_node_find(priv, classid);
+       if (!node)
+               return -ENOENT;
+
+       err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->parent->hw_id,
+                                       node->parent->bw_share,
+                                       node->parent->max_average_bw,
+                                       &new_hw_id);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+               qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+                       classid, err);
+               if (!force)
+                       return err;
+               saved_err = err;
+       }
+
+       /* Store qid for reuse and prevent clearing the bit. */
+       qid = node->qid;
+       /* Pairs with mlx5e_get_txq_by_classid. */
+       WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+       if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               mlx5e_deactivate_qos_sq(priv, qid);
+               mlx5e_close_qos_sq(priv, qid);
+       }
+
+       /* Prevent packets from the old class from getting into the new one. */
+       mlx5e_reset_qdisc(priv->netdev, qid);
+
+       err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
+       if (err) /* Not fatal. */
+               qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+                        node->hw_id, classid, err);
+
+       parent = node->parent;
+       mlx5e_sw_node_delete(priv, node);
+
+       node = parent;
+       WRITE_ONCE(node->qid, qid);
+
+       /* Early return on error in force mode. Parent will still be an inner
+        * node to be deleted by a following delete operation.
+        */
+       if (saved_err)
+               return saved_err;
+
+       old_hw_id = node->hw_id;
+       node->hw_id = new_hw_id;
+
+       if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               err = mlx5e_open_qos_sq(priv, &priv->channels, node);
+               if (err) {
+                       NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+                       qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+                                classid, err);
+               } else {
+                       mlx5e_activate_qos_sq(priv, node);
+               }
+       }
+
+       err = mlx5_qos_destroy_node(priv->mdev, old_hw_id);
+       if (err) /* Not fatal. */
+               qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+                        node->hw_id, classid, err);
+
+       return 0;
+}
+
+static int mlx5e_qos_update_children(struct mlx5e_priv *priv, struct mlx5e_qos_node *node,
+                                    struct netlink_ext_ack *extack)
+{
+       struct mlx5e_qos_node *child;
+       int err = 0;
+       int bkt;
+
+       hash_for_each(priv->htb.qos_tc2node, bkt, child, hnode) {
+               u32 old_bw_share = child->bw_share;
+               int err_one;
+
+               if (child->parent != node)
+                       continue;
+
+               mlx5e_htb_convert_rate(priv, child->rate, node, &child->bw_share);
+               if (child->bw_share == old_bw_share)
+                       continue;
+
+               err_one = mlx5_qos_update_node(priv->mdev, child->hw_id, child->bw_share,
+                                              child->max_average_bw, child->hw_id);
+               if (!err && err_one) {
+                       err = err_one;
+
+                       NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node.");
+                       qos_err(priv->mdev, "Failed to modify a child node (class %04x), err = %d\n",
+                               node->classid, err);
+               }
+       }
+
+       return err;
+}
+
+int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
+                         struct netlink_ext_ack *extack)
+{
+       u32 bw_share, max_average_bw;
+       struct mlx5e_qos_node *node;
+       bool ceil_changed = false;
+       int err;
+
+       qos_dbg(priv->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n",
+               classid, rate, ceil);
+
+       node = mlx5e_sw_node_find(priv, classid);
+       if (!node)
+               return -ENOENT;
+
+       node->rate = rate;
+       mlx5e_htb_convert_rate(priv, rate, node->parent, &bw_share);
+       mlx5e_htb_convert_ceil(priv, ceil, &max_average_bw);
+
+       err = mlx5_qos_update_node(priv->mdev, node->parent->hw_id, bw_share,
+                                  max_average_bw, node->hw_id);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node.");
+               qos_err(priv->mdev, "Failed to modify a node (class %04x), err = %d\n",
+                       classid, err);
+               return err;
+       }
+
+       if (max_average_bw != node->max_average_bw)
+               ceil_changed = true;
+
+       node->bw_share = bw_share;
+       node->max_average_bw = max_average_bw;
+
+       if (ceil_changed)
+               err = mlx5e_qos_update_children(priv, node, extack);
+
+       return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
new file mode 100644 (file)
index 0000000..5af7991
--- /dev/null
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_EN_QOS_H
+#define __MLX5E_EN_QOS_H
+
+#include <linux/mlx5/driver.h>
+
+#define MLX5E_QOS_MAX_LEAF_NODES 256
+
+struct mlx5e_priv;
+struct mlx5e_channels;
+struct mlx5e_channel;
+
+int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
+int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv);
+
+/* TX datapath API */
+int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid);
+struct mlx5e_txqsq *mlx5e_get_sq(struct mlx5e_priv *priv, int qid);
+
+/* SQ lifecycle */
+int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+void mlx5e_qos_activate_queues(struct mlx5e_priv *priv);
+void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c);
+void mlx5e_qos_close_queues(struct mlx5e_channel *c);
+
+/* HTB API */
+int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
+                      struct netlink_ext_ack *extack);
+int mlx5e_htb_root_del(struct mlx5e_priv *priv);
+int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
+                              u32 parent_classid, u64 rate, u64 ceil,
+                              struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
+                           u64 rate, u64 ceil, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
+                      u16 *new_qid, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
+                           struct netlink_ext_ack *extack);
+int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
+                         struct netlink_ext_ack *extack);
+
+#endif
index 2d37742..2e5a069 100644 (file)
@@ -447,6 +447,17 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
                goto out;
        }
 
+       /* Don't allow changing the number of channels if HTB offload is active,
+        * because the numeration of the QoS SQs will change, while per-queue
+        * qdiscs are attached.
+        */
+       if (priv->htb.maj_id) {
+               err = -EINVAL;
+               netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n",
+                          __func__);
+               goto out;
+       }
+
        new_channels.params = priv->channels.params;
        new_channels.params.num_channels = count;
 
@@ -1966,6 +1977,16 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
        if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
                return -EOPNOTSUPP;
 
+       /* Don't allow changing the PTP state if HTB offload is active, because
+        * the numeration of the QoS SQs will change, while per-queue qdiscs are
+        * attached.
+        */
+       if (priv->htb.maj_id) {
+               netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n",
+                          __func__);
+               return -EINVAL;
+       }
+
        new_channels.params = priv->channels.params;
        MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_TX_PORT_TS, enable);
        /* No need to verify SQ stop room as
index f33c386..b9a1759 100644 (file)
@@ -65,6 +65,7 @@
 #include "en/devlink.h"
 #include "lib/mlx5.h"
 #include "en/ptp.h"
+#include "qos.h"
 
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 {
@@ -1143,7 +1144,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
        sq->min_inline_mode = params->tx_min_inline_mode;
        sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-       sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
        INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
        if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
                set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
@@ -1233,6 +1233,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
 int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
                    struct mlx5e_modify_sq_param *p)
 {
+       u64 bitmask = 0;
        void *in;
        void *sqc;
        int inlen;
@@ -1248,9 +1249,14 @@ int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
        MLX5_SET(modify_sq_in, in, sq_state, p->curr_state);
        MLX5_SET(sqc, sqc, state, p->next_state);
        if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) {
-               MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
-               MLX5_SET(sqc,  sqc, packet_pacing_rate_limit_index, p->rl_index);
+               bitmask |= 1;
+               MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index);
        }
+       if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) {
+               bitmask |= 1 << 2;
+               MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id);
+       }
+       MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask);
 
        err = mlx5_core_modify_sq(mdev, sqn, in);
 
@@ -1267,6 +1273,7 @@ static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
 int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
                        struct mlx5e_sq_param *param,
                        struct mlx5e_create_sq_param *csp,
+                       u16 qos_queue_group_id,
                        u32 *sqn)
 {
        struct mlx5e_modify_sq_param msp = {0};
@@ -1278,6 +1285,10 @@ int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
 
        msp.curr_state = MLX5_SQC_STATE_RST;
        msp.next_state = MLX5_SQC_STATE_RDY;
+       if (qos_queue_group_id) {
+               msp.qos_update = true;
+               msp.qos_queue_group_id = qos_queue_group_id;
+       }
        err = mlx5e_modify_sq(mdev, *sqn, &msp);
        if (err)
                mlx5e_destroy_sq(mdev, *sqn);
@@ -1288,13 +1299,9 @@ int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
 static int mlx5e_set_sq_maxrate(struct net_device *dev,
                                struct mlx5e_txqsq *sq, u32 rate);
 
-static int mlx5e_open_txqsq(struct mlx5e_channel *c,
-                           u32 tisn,
-                           int txq_ix,
-                           struct mlx5e_params *params,
-                           struct mlx5e_sq_param *param,
-                           struct mlx5e_txqsq *sq,
-                           int tc)
+int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
+                    struct mlx5e_params *params, struct mlx5e_sq_param *param,
+                    struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid)
 {
        struct mlx5e_create_sq_param csp = {};
        u32 tx_rate;
@@ -1304,12 +1311,17 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c,
        if (err)
                return err;
 
+       if (qos_queue_group_id)
+               sq->stats = c->priv->htb.qos_sq_stats[qos_qid];
+       else
+               sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
+
        csp.tisn            = tisn;
        csp.tis_lst_sz      = 1;
        csp.cqn             = sq->cq.mcq.cqn;
        csp.wq_ctrl         = &sq->wq_ctrl;
        csp.min_inline_mode = sq->min_inline_mode;
-       err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+       err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn);
        if (err)
                goto err_free_txqsq;
 
@@ -1366,7 +1378,7 @@ void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
        }
 }
 
-static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
+void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
 {
        struct mlx5_core_dev *mdev = sq->mdev;
        struct mlx5_rate_limit rl = {0};
@@ -1403,7 +1415,7 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
        csp.cqn             = sq->cq.mcq.cqn;
        csp.wq_ctrl         = &sq->wq_ctrl;
        csp.min_inline_mode = params->tx_min_inline_mode;
-       err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+       err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
        if (err)
                goto err_free_icosq;
 
@@ -1452,7 +1464,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
        csp.wq_ctrl         = &sq->wq_ctrl;
        csp.min_inline_mode = sq->min_inline_mode;
        set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
-       err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+       err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
        if (err)
                goto err_free_xdpsq;
 
@@ -1703,7 +1715,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c,
                int txq_ix = c->ix + tc * params->num_channels;
 
                err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
-                                      params, &cparam->txq_sq, &c->sq[tc], tc);
+                                      params, &cparam->txq_sq, &c->sq[tc], tc, 0, 0);
                if (err)
                        goto err_close_sqs;
        }
@@ -2044,6 +2056,8 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
        mlx5e_deactivate_icosq(&c->icosq);
        for (tc = 0; tc < c->num_tc; tc++)
                mlx5e_deactivate_txqsq(&c->sq[tc]);
+
+       mlx5e_qos_deactivate_queues(c);
 }
 
 static void mlx5e_close_channel(struct mlx5e_channel *c)
@@ -2051,6 +2065,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
        if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
                mlx5e_close_xsk(c);
        mlx5e_close_queues(c);
+       mlx5e_qos_close_queues(c);
        netif_napi_del(&c->napi);
 
        kvfree(c);
@@ -2198,9 +2213,8 @@ void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
        param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev));
 }
 
-static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
-                                struct mlx5e_params *params,
-                                struct mlx5e_sq_param *param)
+void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
+                         struct mlx5e_sq_param *param)
 {
        void *sqc = param->sqc;
        void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2379,10 +2393,18 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
                        goto err_close_channels;
        }
 
+       err = mlx5e_qos_open_queues(priv, chs);
+       if (err)
+               goto err_close_ptp;
+
        mlx5e_health_channels_update(priv);
        kvfree(cparam);
        return 0;
 
+err_close_ptp:
+       if (chs->port_ptp)
+               mlx5e_port_ptp_close(chs->port_ptp);
+
 err_close_channels:
        for (i--; i >= 0; i--)
                mlx5e_close_channel(chs->c[i]);
@@ -2915,11 +2937,31 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc)
                netdev_set_tc_queue(netdev, tc, nch, 0);
 }
 
+int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
+{
+       int qos_queues, nch, ntc, num_txqs, err;
+
+       qos_queues = mlx5e_qos_cur_leaf_nodes(priv);
+
+       nch = priv->channels.params.num_channels;
+       ntc = priv->channels.params.num_tc;
+       num_txqs = nch * ntc + qos_queues;
+       if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS))
+               num_txqs += ntc;
+
+       mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs);
+       err = netif_set_real_num_tx_queues(priv->netdev, num_txqs);
+       if (err)
+               netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err);
+
+       return err;
+}
+
 static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
 {
        struct net_device *netdev = priv->netdev;
-       int num_txqs, num_rxqs, nch, ntc;
        int old_num_txqs, old_ntc;
+       int num_rxqs, nch, ntc;
        int err;
 
        old_num_txqs = netdev->real_num_tx_queues;
@@ -2927,18 +2969,13 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
 
        nch = priv->channels.params.num_channels;
        ntc = priv->channels.params.num_tc;
-       num_txqs = nch * ntc;
-       if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS))
-               num_txqs += ntc;
        num_rxqs = nch * priv->profile->rq_groups;
 
        mlx5e_netdev_set_tcs(netdev, nch, ntc);
 
-       err = netif_set_real_num_tx_queues(netdev, num_txqs);
-       if (err) {
-               netdev_warn(netdev, "netif_set_real_num_tx_queues failed, %d\n", err);
+       err = mlx5e_update_tx_netdev_queues(priv);
+       if (err)
                goto err_tcs;
-       }
        err = netif_set_real_num_rx_queues(netdev, num_rxqs);
        if (err) {
                netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err);
@@ -3042,6 +3079,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
        mlx5e_update_num_tc_x_num_ch(priv);
        mlx5e_build_txq_maps(priv);
        mlx5e_activate_channels(&priv->channels);
+       mlx5e_qos_activate_queues(priv);
        mlx5e_xdp_tx_enable(priv);
        netif_tx_start_all_queues(priv->netdev);
 
@@ -3608,6 +3646,14 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
 
        mutex_lock(&priv->state_lock);
 
+       /* MQPRIO is another toplevel qdisc that can't be attached
+        * simultaneously with the offloaded HTB.
+        */
+       if (WARN_ON(priv->htb.maj_id)) {
+               err = -EINVAL;
+               goto out;
+       }
+
        new_channels.params = priv->channels.params;
        new_channels.params.num_tc = tc ? tc : 1;
 
@@ -3628,12 +3674,55 @@ out:
        return err;
 }
 
+static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb)
+{
+       int res;
+
+       switch (htb->command) {
+       case TC_HTB_CREATE:
+               return mlx5e_htb_root_add(priv, htb->parent_classid, htb->classid,
+                                         htb->extack);
+       case TC_HTB_DESTROY:
+               return mlx5e_htb_root_del(priv);
+       case TC_HTB_LEAF_ALLOC_QUEUE:
+               res = mlx5e_htb_leaf_alloc_queue(priv, htb->classid, htb->parent_classid,
+                                                htb->rate, htb->ceil, htb->extack);
+               if (res < 0)
+                       return res;
+               htb->qid = res;
+               return 0;
+       case TC_HTB_LEAF_TO_INNER:
+               return mlx5e_htb_leaf_to_inner(priv, htb->parent_classid, htb->classid,
+                                              htb->rate, htb->ceil, htb->extack);
+       case TC_HTB_LEAF_DEL:
+               return mlx5e_htb_leaf_del(priv, htb->classid, &htb->moved_qid, &htb->qid,
+                                         htb->extack);
+       case TC_HTB_LEAF_DEL_LAST:
+       case TC_HTB_LEAF_DEL_LAST_FORCE:
+               return mlx5e_htb_leaf_del_last(priv, htb->classid,
+                                              htb->command == TC_HTB_LEAF_DEL_LAST_FORCE,
+                                              htb->extack);
+       case TC_HTB_NODE_MODIFY:
+               return mlx5e_htb_node_modify(priv, htb->classid, htb->rate, htb->ceil,
+                                            htb->extack);
+       case TC_HTB_LEAF_QUERY_QUEUE:
+               res = mlx5e_get_txq_by_classid(priv, htb->classid);
+               if (res < 0)
+                       return res;
+               htb->qid = res;
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
 static LIST_HEAD(mlx5e_block_cb_list);
 
 static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
                          void *type_data)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
+       int err;
 
        switch (type) {
        case TC_SETUP_BLOCK: {
@@ -3647,6 +3736,11 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
        }
        case TC_SETUP_QDISC_MQPRIO:
                return mlx5e_setup_tc_mqprio(priv, type_data);
+       case TC_SETUP_QDISC_HTB:
+               mutex_lock(&priv->state_lock);
+               err = mlx5e_setup_tc_htb(priv, type_data);
+               mutex_unlock(&priv->state_lock);
+               return err;
        default:
                return -EOPNOTSUPP;
        }
@@ -3811,20 +3905,25 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
        return 0;
 }
 
-#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
-static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
+static int set_feature_hw_tc(struct net_device *netdev, bool enable)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
 
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
        if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
                netdev_err(netdev,
                           "Active offloaded tc filters, can't turn hw_tc_offload off\n");
                return -EINVAL;
        }
+#endif
+
+       if (!enable && priv->htb.maj_id) {
+               netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n");
+               return -EINVAL;
+       }
 
        return 0;
 }
-#endif
 
 static int set_feature_rx_all(struct net_device *netdev, bool enable)
 {
@@ -3922,9 +4021,7 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
        err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
        err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
                                    set_feature_cvlan_filter);
-#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
-       err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters);
-#endif
+       err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc);
        err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
        err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
        err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
@@ -5028,6 +5125,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
                netdev->hw_features      |= NETIF_F_NTUPLE;
 #endif
        }
+       if (mlx5_qos_is_supported(mdev))
+               netdev->features |= NETIF_F_HW_TC;
 
        netdev->features         |= NETIF_F_HIGHDMA;
        netdev->features         |= NETIF_F_HW_VLAN_STAG_FILTER;
@@ -5333,6 +5432,7 @@ int mlx5e_netdev_init(struct net_device *netdev,
                return -ENOMEM;
 
        mutex_init(&priv->state_lock);
+       hash_init(priv->htb.qos_tc2node);
        INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
        INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
        INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
@@ -5355,8 +5455,14 @@ err_free_cpumask:
 
 void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv)
 {
+       int i;
+
        destroy_workqueue(priv->wq);
        free_cpumask_var(priv->scratchpad.cpumask);
+
+       for (i = 0; i < priv->htb.max_qos_sqs; i++)
+               kfree(priv->htb.qos_sq_stats[i]);
+       kvfree(priv->htb.qos_sq_stats);
 }
 
 struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
@@ -5366,13 +5472,17 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 {
        struct net_device *netdev;
        unsigned int ptp_txqs = 0;
+       int qos_sqs = 0;
        int err;
 
        if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
                ptp_txqs = profile->max_tc;
 
+       if (mlx5_qos_is_supported(mdev))
+               qos_sqs = mlx5e_qos_max_leaf_nodes(mdev);
+
        netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
-                                   nch * profile->max_tc + ptp_txqs,
+                                   nch * profile->max_tc + ptp_txqs + qos_sqs,
                                    nch * profile->rq_groups);
        if (!netdev) {
                mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
index 2cf2042..92c5b81 100644 (file)
@@ -420,6 +420,25 @@ static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv,
        }
 }
 
+static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv,
+                                               struct mlx5e_sw_stats *s)
+{
+       struct mlx5e_sq_stats **stats;
+       u16 max_qos_sqs;
+       int i;
+
+       /* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+       max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
+       stats = READ_ONCE(priv->htb.qos_sq_stats);
+
+       for (i = 0; i < max_qos_sqs; i++) {
+               mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i]));
+
+               /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+               barrier();
+       }
+}
+
 static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
 {
        struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -449,6 +468,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
                }
        }
        mlx5e_stats_grp_sw_update_stats_ptp(priv, s);
+       mlx5e_stats_grp_sw_update_stats_qos(priv, s);
 }
 
 static const struct counter_desc q_stats_desc[] = {
@@ -1740,6 +1760,41 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
        { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
 };
 
+static const struct counter_desc qos_sq_stats_desc[] = {
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
+#ifdef CONFIG_MLX5_EN_TLS
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
+#endif
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, stopped) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, dropped) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, recover) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqes) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, wake) },
+       { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
+};
+
 #define NUM_RQ_STATS                   ARRAY_SIZE(rq_stats_desc)
 #define NUM_SQ_STATS                   ARRAY_SIZE(sq_stats_desc)
 #define NUM_XDPSQ_STATS                        ARRAY_SIZE(xdpsq_stats_desc)
@@ -1750,6 +1805,49 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
 #define NUM_PTP_SQ_STATS               ARRAY_SIZE(ptp_sq_stats_desc)
 #define NUM_PTP_CH_STATS               ARRAY_SIZE(ptp_ch_stats_desc)
 #define NUM_PTP_CQ_STATS               ARRAY_SIZE(ptp_cq_stats_desc)
+#define NUM_QOS_SQ_STATS               ARRAY_SIZE(qos_sq_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos)
+{
+       /* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+       return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb.max_qos_sqs);
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos)
+{
+       /* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+       u16 max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
+       int i, qid;
+
+       for (qid = 0; qid < max_qos_sqs; qid++)
+               for (i = 0; i < NUM_QOS_SQ_STATS; i++)
+                       sprintf(data + (idx++) * ETH_GSTRING_LEN,
+                               qos_sq_stats_desc[i].format, qid);
+
+       return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos)
+{
+       struct mlx5e_sq_stats **stats;
+       u16 max_qos_sqs;
+       int i, qid;
+
+       /* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+       max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
+       stats = READ_ONCE(priv->htb.qos_sq_stats);
+
+       for (qid = 0; qid < max_qos_sqs; qid++) {
+               struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]);
+
+               for (i = 0; i < NUM_QOS_SQ_STATS; i++)
+                       data[idx++] = MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i);
+       }
+
+       return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; }
 
 static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp)
 {
@@ -1932,6 +2030,7 @@ MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0);
 MLX5E_DEFINE_STATS_GRP(eth_ext, 0);
 static MLX5E_DEFINE_STATS_GRP(tls, 0);
 static MLX5E_DEFINE_STATS_GRP(ptp, 0);
+static MLX5E_DEFINE_STATS_GRP(qos, 0);
 
 /* The stats groups order is opposite to the update_stats() order calls */
 mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
@@ -1955,6 +2054,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
        &MLX5E_STATS_GRP(channels),
        &MLX5E_STATS_GRP(per_port_buff_congest),
        &MLX5E_STATS_GRP(ptp),
+       &MLX5E_STATS_GRP(qos),
 };
 
 unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv)
index e41fc11..93c4131 100644 (file)
@@ -55,6 +55,8 @@
 #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld)
 #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld)
 
+#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld)
+
 struct counter_desc {
        char            format[ETH_GSTRING_LEN];
        size_t          offset; /* Byte offset */
index 74f233e..da6a358 100644 (file)
@@ -106,28 +106,53 @@ return_txq:
        return priv->port_ptp_tc2realtxq[up];
 }
 
+static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb,
+                                 u16 htb_maj_id)
+{
+       u16 classid;
+
+       if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id)
+               classid = TC_H_MIN(skb->priority);
+       else
+               classid = READ_ONCE(priv->htb.defcls);
+
+       if (!classid)
+               return 0;
+
+       return mlx5e_get_txq_by_classid(priv, classid);
+}
+
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
                       struct net_device *sb_dev)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
+       int num_tc_x_num_ch;
        int txq_ix;
        int up = 0;
        int ch_ix;
 
-       if (unlikely(priv->channels.port_ptp)) {
-               int num_tc_x_num_ch;
+       /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */
+       num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch);
+       if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) {
+               /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
+               u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id);
 
-               if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
-                   mlx5e_use_ptpsq(skb))
-                       return mlx5e_select_ptpsq(dev, skb);
+               if (unlikely(htb_maj_id)) {
+                       txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id);
+                       if (txq_ix > 0)
+                               return txq_ix;
+               }
 
-               /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */
-               num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch);
+               if (unlikely(priv->channels.port_ptp))
+                       if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+                           mlx5e_use_ptpsq(skb))
+                               return mlx5e_select_ptpsq(dev, skb);
 
                txq_ix = netdev_pick_tx(dev, skb, NULL);
-               /* Fix netdev_pick_tx() not to choose ptp_channel txqs.
+               /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
                 * If they are selected, switch to regular queues.
-                * Driver to select these queues only at mlx5e_select_ptpsq().
+                * Driver to select these queues only at mlx5e_select_ptpsq()
+                * and mlx5e_select_htb_queue().
                 */
                if (unlikely(txq_ix >= num_tc_x_num_ch))
                        txq_ix %= num_tc_x_num_ch;
@@ -702,6 +727,10 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
        u16 pi;
 
        sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+       if (unlikely(!sq)) {
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
+       }
 
        /* May send SKBs and WQEs. */
        if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
index a3cfe06..d54da37 100644 (file)
@@ -115,17 +115,21 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
                                               napi);
        struct mlx5e_ch_stats *ch_stats = c->stats;
        struct mlx5e_xdpsq *xsksq = &c->xsksq;
+       struct mlx5e_txqsq __rcu **qos_sqs;
        struct mlx5e_rq *xskrq = &c->xskrq;
        struct mlx5e_rq *rq = &c->rq;
        bool aff_change = false;
        bool busy_xsk = false;
        bool busy = false;
        int work_done = 0;
+       u16 qos_sqs_size;
        bool xsk_open;
        int i;
 
        rcu_read_lock();
 
+       qos_sqs = rcu_dereference(c->qos_sqs);
+
        xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
 
        ch_stats->poll++;
@@ -133,6 +137,18 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
        for (i = 0; i < c->num_tc; i++)
                busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
 
+       if (unlikely(qos_sqs)) {
+               smp_rmb(); /* Pairs with mlx5e_qos_alloc_queues. */
+               qos_sqs_size = READ_ONCE(c->qos_sqs_size);
+
+               for (i = 0; i < qos_sqs_size; i++) {
+                       struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]);
+
+                       if (sq)
+                               busy |= mlx5e_poll_tx_cq(&sq->cq, budget);
+               }
+       }
+
        busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
 
        if (c->xdp)
@@ -186,6 +202,16 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
                mlx5e_handle_tx_dim(&c->sq[i]);
                mlx5e_cq_arm(&c->sq[i].cq);
        }
+       if (unlikely(qos_sqs)) {
+               for (i = 0; i < qos_sqs_size; i++) {
+                       struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]);
+
+                       if (sq) {
+                               mlx5e_handle_tx_dim(sq);
+                               mlx5e_cq_arm(&sq->cq);
+                       }
+               }
+       }
 
        mlx5e_handle_rx_dim(rq);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
new file mode 100644 (file)
index 0000000..0777be2
--- /dev/null
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "qos.h"
+
+#define MLX5_QOS_DEFAULT_DWRR_UID 0
+
+bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev)
+{
+       if (!MLX5_CAP_GEN(mdev, qos))
+               return false;
+       if (!MLX5_CAP_QOS(mdev, nic_sq_scheduling))
+               return false;
+       if (!MLX5_CAP_QOS(mdev, nic_bw_share))
+               return false;
+       if (!MLX5_CAP_QOS(mdev, nic_rate_limit))
+               return false;
+       return true;
+}
+
+int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
+{
+       return 1 << MLX5_CAP_QOS(mdev, log_max_qos_nic_queue_group);
+}
+
+int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
+                             u32 bw_share, u32 max_avg_bw, u32 *id)
+{
+       u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+
+       MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+       MLX5_SET(scheduling_context, sched_ctx, element_type,
+                SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP);
+       MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+       MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
+
+       return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
+                                                 sched_ctx, id);
+}
+
+int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
+                              u32 bw_share, u32 max_avg_bw, u32 *id)
+{
+       u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+       void *attr;
+
+       MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+       MLX5_SET(scheduling_context, sched_ctx, element_type,
+                SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+       MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+       MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
+
+       attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
+       MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
+
+       return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
+                                                 sched_ctx, id);
+}
+
+int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id)
+{
+       return mlx5_qos_create_inner_node(mdev, MLX5_QOS_DEFAULT_DWRR_UID, 0, 0, id);
+}
+
+int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id,
+                        u32 bw_share, u32 max_avg_bw, u32 id)
+{
+       u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+       u32 bitmask = 0;
+
+       MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+       MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+       MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
+
+       bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
+       bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+
+       return mlx5_modify_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
+                                                 sched_ctx, id, bitmask);
+}
+
+int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id)
+{
+       return mlx5_destroy_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, id);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/qos.h
new file mode 100644 (file)
index 0000000..125e4e4
--- /dev/null
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_QOS_H
+#define __MLX5_QOS_H
+
+#include "mlx5_core.h"
+
+#define MLX5_DEBUG_QOS_MASK BIT(4)
+
+#define qos_err(mdev, fmt, ...) \
+       mlx5_core_err(mdev, "QoS: " fmt, ##__VA_ARGS__)
+#define qos_warn(mdev, fmt, ...) \
+       mlx5_core_warn(mdev, "QoS: " fmt, ##__VA_ARGS__)
+#define qos_dbg(mdev, fmt, ...) \
+       mlx5_core_dbg_mask(mdev, MLX5_DEBUG_QOS_MASK, "QoS: " fmt, ##__VA_ARGS__)
+
+bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev);
+int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
+
+int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
+                             u32 bw_share, u32 max_avg_bw, u32 *id);
+int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
+                              u32 bw_share, u32 max_avg_bw, u32 *id);
+int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id);
+int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id, u32 bw_share,
+                        u32 max_avg_bw, u32 id);
+int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id);
+
+#endif
index 685037e..52fdc34 100644 (file)
@@ -84,6 +84,7 @@ struct mlxsw_core {
        struct mlxsw_thermal *thermal;
        struct mlxsw_core_port *ports;
        unsigned int max_ports;
+       atomic_t active_ports_count;
        bool fw_flash_in_progress;
        struct {
                struct devlink_health_reporter *fw_fatal;
@@ -96,8 +97,36 @@ struct mlxsw_core {
 
 #define MLXSW_PORT_MAX_PORTS_DEFAULT   0x40
 
-static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core)
+static u64 mlxsw_ports_occ_get(void *priv)
 {
+       struct mlxsw_core *mlxsw_core = priv;
+
+       return atomic_read(&mlxsw_core->active_ports_count);
+}
+
+static int mlxsw_core_resources_ports_register(struct mlxsw_core *mlxsw_core)
+{
+       struct devlink *devlink = priv_to_devlink(mlxsw_core);
+       struct devlink_resource_size_params ports_num_params;
+       u32 max_ports;
+
+       max_ports = mlxsw_core->max_ports - 1;
+       devlink_resource_size_params_init(&ports_num_params, max_ports,
+                                         max_ports, 1,
+                                         DEVLINK_RESOURCE_UNIT_ENTRY);
+
+       return devlink_resource_register(devlink,
+                                        DEVLINK_RESOURCE_GENERIC_NAME_PORTS,
+                                        max_ports, MLXSW_CORE_RESOURCE_PORTS,
+                                        DEVLINK_RESOURCE_ID_PARENT_TOP,
+                                        &ports_num_params);
+}
+
+static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core, bool reload)
+{
+       struct devlink *devlink = priv_to_devlink(mlxsw_core);
+       int err;
+
        /* Switch ports are numbered from 1 to queried value */
        if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SYSTEM_PORT))
                mlxsw_core->max_ports = MLXSW_CORE_RES_GET(mlxsw_core,
@@ -110,11 +139,30 @@ static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core)
        if (!mlxsw_core->ports)
                return -ENOMEM;
 
+       if (!reload) {
+               err = mlxsw_core_resources_ports_register(mlxsw_core);
+               if (err)
+                       goto err_resources_ports_register;
+       }
+       atomic_set(&mlxsw_core->active_ports_count, 0);
+       devlink_resource_occ_get_register(devlink, MLXSW_CORE_RESOURCE_PORTS,
+                                         mlxsw_ports_occ_get, mlxsw_core);
+
        return 0;
+
+err_resources_ports_register:
+       kfree(mlxsw_core->ports);
+       return err;
 }
 
-static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core)
+static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core, bool reload)
 {
+       struct devlink *devlink = priv_to_devlink(mlxsw_core);
+
+       devlink_resource_occ_get_unregister(devlink, MLXSW_CORE_RESOURCE_PORTS);
+       if (!reload)
+               devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL);
+
        kfree(mlxsw_core->ports);
 }
 
@@ -1897,7 +1945,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
                        goto err_register_resources;
        }
 
-       err = mlxsw_ports_init(mlxsw_core);
+       err = mlxsw_ports_init(mlxsw_core, reload);
        if (err)
                goto err_ports_init;
 
@@ -1986,7 +2034,7 @@ err_devlink_register:
 err_emad_init:
        kfree(mlxsw_core->lag.mapping);
 err_alloc_lag_mapping:
-       mlxsw_ports_fini(mlxsw_core);
+       mlxsw_ports_fini(mlxsw_core, reload);
 err_ports_init:
        if (!reload)
                devlink_resources_unregister(devlink, NULL);
@@ -2056,7 +2104,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
                devlink_unregister(devlink);
        mlxsw_emad_fini(mlxsw_core);
        kfree(mlxsw_core->lag.mapping);
-       mlxsw_ports_fini(mlxsw_core);
+       mlxsw_ports_fini(mlxsw_core, reload);
        if (!reload)
                devlink_resources_unregister(devlink, NULL);
        mlxsw_core->bus->fini(mlxsw_core->bus_priv);
@@ -2755,16 +2803,25 @@ int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port,
                         const unsigned char *switch_id,
                         unsigned char switch_id_len)
 {
-       return __mlxsw_core_port_init(mlxsw_core, local_port,
-                                     DEVLINK_PORT_FLAVOUR_PHYSICAL,
-                                     port_number, split, split_port_subnumber,
-                                     splittable, lanes,
-                                     switch_id, switch_id_len);
+       int err;
+
+       err = __mlxsw_core_port_init(mlxsw_core, local_port,
+                                    DEVLINK_PORT_FLAVOUR_PHYSICAL,
+                                    port_number, split, split_port_subnumber,
+                                    splittable, lanes,
+                                    switch_id, switch_id_len);
+       if (err)
+               return err;
+
+       atomic_inc(&mlxsw_core->active_ports_count);
+       return 0;
 }
 EXPORT_SYMBOL(mlxsw_core_port_init);
 
 void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port)
 {
+       atomic_dec(&mlxsw_core->active_ports_count);
+
        __mlxsw_core_port_fini(mlxsw_core, local_port);
 }
 EXPORT_SYMBOL(mlxsw_core_port_fini);
index 6b3ccbf..8af7d9d 100644 (file)
 #include "cmd.h"
 #include "resources.h"
 
+enum mlxsw_core_resource_id {
+       MLXSW_CORE_RESOURCE_PORTS = 1,
+       MLXSW_CORE_RESOURCE_MAX,
+};
+
 struct mlxsw_core;
 struct mlxsw_core_port;
 struct mlxsw_driver;
index a6956cf..a3769f9 100644 (file)
@@ -52,7 +52,7 @@
 #define MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF "rif"
 
 enum mlxsw_sp_resource_id {
-       MLXSW_SP_RESOURCE_KVD = 1,
+       MLXSW_SP_RESOURCE_KVD = MLXSW_CORE_RESOURCE_MAX,
        MLXSW_SP_RESOURCE_KVD_LINEAR,
        MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
        MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
index fb67d8f..475e6f0 100644 (file)
@@ -4850,10 +4850,8 @@ static void rtl_shutdown(struct pci_dev *pdev)
        rtl_rar_set(tp, tp->dev->perm_addr);
 
        if (system_state == SYSTEM_POWER_OFF) {
-               if (tp->saved_wolopts) {
-                       rtl_wol_enable_rx(tp);
+               if (tp->saved_wolopts)
                        rtl_wol_shutdown_quirk(tp);
-               }
 
                pci_wake_from_d3(pdev, tp->saved_wolopts);
                pci_set_power_state(pdev, PCI_D3hot);
index d067da1..967a634 100644 (file)
@@ -923,7 +923,7 @@ static int ofdpa_flow_tbl_bridge(struct ofdpa_port *ofdpa_port,
        struct ofdpa_flow_tbl_entry *entry;
        u32 priority;
        bool vlan_bridging = !!vlan_id;
-       bool dflt = !eth_dst || (eth_dst && eth_dst_mask);
+       bool dflt = !eth_dst || eth_dst_mask;
        bool wild = false;
 
        entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
index a4a626e..1bfeee2 100644 (file)
@@ -17,6 +17,7 @@
 #include "rx_common.h"
 #include "nic.h"
 #include "sriov.h"
+#include "workarounds.h"
 
 /* This is the first interrupt mode to try out of:
  * 0 => MSI-X
@@ -137,6 +138,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
 {
        unsigned int n_channels = parallelism;
        int vec_count;
+       int tx_per_ev;
        int n_xdp_tx;
        int n_xdp_ev;
 
@@ -149,9 +151,9 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
         * multiple tx queues, assuming tx and ev queues are both
         * maximum size.
         */
-
+       tx_per_ev = EFX_MAX_EVQ_SIZE / EFX_TXQ_MAX_ENT(efx);
        n_xdp_tx = num_possible_cpus();
-       n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_MAX_TXQ_PER_CHANNEL);
+       n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, tx_per_ev);
 
        vec_count = pci_msix_vec_count(efx->pci_dev);
        if (vec_count < 0)
index 5b29f7d..f79cf3c 100644 (file)
@@ -272,7 +272,7 @@ static void gsi_irq_ch_ctrl_disable(struct gsi *gsi)
        iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
 }
 
-static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id)
+static void gsi_irq_ieob_enable_one(struct gsi *gsi, u32 evt_ring_id)
 {
        bool enable_ieob = !gsi->ieob_enabled_bitmap;
        u32 val;
@@ -286,11 +286,11 @@ static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id)
                gsi_irq_type_enable(gsi, GSI_IEOB);
 }
 
-static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id)
+static void gsi_irq_ieob_disable(struct gsi *gsi, u32 event_mask)
 {
        u32 val;
 
-       gsi->ieob_enabled_bitmap &= ~BIT(evt_ring_id);
+       gsi->ieob_enabled_bitmap &= ~event_mask;
 
        /* Disable the interrupt type if this was the last enabled channel */
        if (!gsi->ieob_enabled_bitmap)
@@ -300,6 +300,11 @@ static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id)
        iowrite32(val, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
 }
 
+static void gsi_irq_ieob_disable_one(struct gsi *gsi, u32 evt_ring_id)
+{
+       gsi_irq_ieob_disable(gsi, BIT(evt_ring_id));
+}
+
 /* Enable all GSI_interrupt types */
 static void gsi_irq_enable(struct gsi *gsi)
 {
@@ -766,13 +771,13 @@ static void gsi_channel_freeze(struct gsi_channel *channel)
 
        napi_disable(&channel->napi);
 
-       gsi_irq_ieob_disable(channel->gsi, channel->evt_ring_id);
+       gsi_irq_ieob_disable_one(channel->gsi, channel->evt_ring_id);
 }
 
 /* Allow transactions to be used on the channel again. */
 static void gsi_channel_thaw(struct gsi_channel *channel)
 {
-       gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id);
+       gsi_irq_ieob_enable_one(channel->gsi, channel->evt_ring_id);
 
        napi_enable(&channel->napi);
 }
@@ -1200,6 +1205,7 @@ static void gsi_isr_ieob(struct gsi *gsi)
        u32 event_mask;
 
        event_mask = ioread32(gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_OFFSET);
+       gsi_irq_ieob_disable(gsi, event_mask);
        iowrite32(event_mask, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_CLR_OFFSET);
 
        while (event_mask) {
@@ -1207,7 +1213,6 @@ static void gsi_isr_ieob(struct gsi *gsi)
 
                event_mask ^= BIT(evt_ring_id);
 
-               gsi_irq_ieob_disable(gsi, evt_ring_id);
                napi_schedule(&gsi->evt_ring[evt_ring_id].channel->napi);
        }
 }
@@ -1452,7 +1457,7 @@ void gsi_channel_doorbell(struct gsi_channel *channel)
 }
 
 /* Consult hardware, move any newly completed transactions to completed list */
-static void gsi_channel_update(struct gsi_channel *channel)
+static struct gsi_trans *gsi_channel_update(struct gsi_channel *channel)
 {
        u32 evt_ring_id = channel->evt_ring_id;
        struct gsi *gsi = channel->gsi;
@@ -1471,7 +1476,7 @@ static void gsi_channel_update(struct gsi_channel *channel)
        offset = GSI_EV_CH_E_CNTXT_4_OFFSET(evt_ring_id);
        index = gsi_ring_index(ring, ioread32(gsi->virt + offset));
        if (index == ring->index % ring->count)
-               return;
+               return NULL;
 
        /* Get the transaction for the latest completed event.  Take a
         * reference to keep it from completing before we give the events
@@ -1496,6 +1501,8 @@ static void gsi_channel_update(struct gsi_channel *channel)
        gsi_evt_ring_doorbell(channel->gsi, channel->evt_ring_id, index);
 
        gsi_trans_free(trans);
+
+       return gsi_channel_trans_complete(channel);
 }
 
 /**
@@ -1516,11 +1523,8 @@ static struct gsi_trans *gsi_channel_poll_one(struct gsi_channel *channel)
 
        /* Get the first transaction from the completed list */
        trans = gsi_channel_trans_complete(channel);
-       if (!trans) {
-               /* List is empty; see if there's more to do */
-               gsi_channel_update(channel);
-               trans = gsi_channel_trans_complete(channel);
-       }
+       if (!trans)     /* List is empty; see if there's more to do */
+               trans = gsi_channel_update(channel);
 
        if (trans)
                gsi_trans_move_polled(trans);
@@ -1543,23 +1547,20 @@ static struct gsi_trans *gsi_channel_poll_one(struct gsi_channel *channel)
 static int gsi_channel_poll(struct napi_struct *napi, int budget)
 {
        struct gsi_channel *channel;
-       int count = 0;
+       int count;
 
        channel = container_of(napi, struct gsi_channel, napi);
-       while (count < budget) {
+       for (count = 0; count < budget; count++) {
                struct gsi_trans *trans;
 
-               count++;
                trans = gsi_channel_poll_one(channel);
                if (!trans)
                        break;
                gsi_trans_complete(trans);
        }
 
-       if (count < budget) {
-               napi_complete(&channel->napi);
-               gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id);
-       }
+       if (count < budget && napi_complete(napi))
+               gsi_irq_ieob_enable_one(channel->gsi, channel->evt_ring_id);
 
        return count;
 }
index 6c23710..c6c6a7f 100644 (file)
@@ -43,7 +43,6 @@ enum ipa_flag {
  * @flags:             Boolean state flags
  * @version:           IPA hardware version
  * @pdev:              Platform device
- * @modem_rproc:       Remoteproc handle for modem subsystem
  * @smp2p:             SMP2P information
  * @clock:             IPA clocking information
  * @table_addr:                DMA address of filter/route table content
@@ -83,7 +82,6 @@ struct ipa {
        DECLARE_BITMAP(flags, IPA_FLAG_COUNT);
        enum ipa_version version;
        struct platform_device *pdev;
-       struct rproc *modem_rproc;
        struct notifier_block nb;
        void *notifier;
        struct ipa_smp2p *smp2p;
index 84bb8ae..ab0fd5c 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_address.h>
-#include <linux/remoteproc.h>
 #include <linux/qcom_scm.h>
 #include <linux/soc/qcom/mdt_loader.h>
 
@@ -729,19 +728,6 @@ static const struct of_device_id ipa_match[] = {
 };
 MODULE_DEVICE_TABLE(of, ipa_match);
 
-static phandle of_property_read_phandle(const struct device_node *np,
-                                       const char *name)
-{
-        struct property *prop;
-        int len = 0;
-
-        prop = of_find_property(np, name, &len);
-        if (!prop || len != sizeof(__be32))
-                return 0;
-
-        return be32_to_cpup(prop->value);
-}
-
 /* Check things that can be validated at build time.  This just
  * groups these things BUILD_BUG_ON() calls don't clutter the rest
  * of the code.
@@ -807,10 +793,8 @@ static int ipa_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        const struct ipa_data *data;
        struct ipa_clock *clock;
-       struct rproc *rproc;
        bool modem_init;
        struct ipa *ipa;
-       phandle ph;
        int ret;
 
        ipa_validate_build();
@@ -829,25 +813,12 @@ static int ipa_probe(struct platform_device *pdev)
                if (!qcom_scm_is_available())
                        return -EPROBE_DEFER;
 
-       /* We rely on remoteproc to tell us about modem state changes */
-       ph = of_property_read_phandle(dev->of_node, "modem-remoteproc");
-       if (!ph) {
-               dev_err(dev, "DT missing \"modem-remoteproc\" property\n");
-               return -EINVAL;
-       }
-
-       rproc = rproc_get_by_phandle(ph);
-       if (!rproc)
-               return -EPROBE_DEFER;
-
        /* The clock and interconnects might not be ready when we're
         * probed, so might return -EPROBE_DEFER.
         */
        clock = ipa_clock_init(dev, data->clock_data);
-       if (IS_ERR(clock)) {
-               ret = PTR_ERR(clock);
-               goto err_rproc_put;
-       }
+       if (IS_ERR(clock))
+               return PTR_ERR(clock);
 
        /* No more EPROBE_DEFER.  Allocate and initialize the IPA structure */
        ipa = kzalloc(sizeof(*ipa), GFP_KERNEL);
@@ -858,7 +829,6 @@ static int ipa_probe(struct platform_device *pdev)
 
        ipa->pdev = pdev;
        dev_set_drvdata(dev, ipa);
-       ipa->modem_rproc = rproc;
        ipa->clock = clock;
        ipa->version = data->version;
 
@@ -935,8 +905,6 @@ err_kfree_ipa:
        kfree(ipa);
 err_clock_exit:
        ipa_clock_exit(clock);
-err_rproc_put:
-       rproc_put(rproc);
 
        return ret;
 }
@@ -944,7 +912,6 @@ err_rproc_put:
 static int ipa_remove(struct platform_device *pdev)
 {
        struct ipa *ipa = dev_get_drvdata(&pdev->dev);
-       struct rproc *rproc = ipa->modem_rproc;
        struct ipa_clock *clock = ipa->clock;
        int ret;
 
@@ -970,7 +937,6 @@ static int ipa_remove(struct platform_device *pdev)
        ipa_reg_exit(ipa);
        kfree(ipa);
        ipa_clock_exit(clock);
-       rproc_put(rproc);
 
        return 0;
 }
index 5f3a4cc..a5a214d 100644 (file)
@@ -248,6 +248,10 @@ static void mhi_net_rx_refill_work(struct work_struct *work)
                schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
 }
 
+static struct device_type wwan_type = {
+       .name = "wwan",
+};
+
 static int mhi_net_probe(struct mhi_device *mhi_dev,
                         const struct mhi_device_id *id)
 {
@@ -267,6 +271,7 @@ static int mhi_net_probe(struct mhi_device *mhi_dev,
        mhi_netdev->ndev = ndev;
        mhi_netdev->mdev = mhi_dev;
        SET_NETDEV_DEV(ndev, &mhi_dev->dev);
+       SET_NETDEV_DEVTYPE(ndev, &wwan_type);
 
        /* All MHI net channels have 128 ring elements (at least for now) */
        mhi_netdev->rx_queue_sz = 128;
index 99ecd6c..821e85a 100644 (file)
@@ -60,6 +60,9 @@
 #define RTL_LPADV_5000FULL                     BIT(6)
 #define RTL_LPADV_2500FULL                     BIT(5)
 
+#define RTL9000A_GINMR                         0x14
+#define RTL9000A_GINMR_LINK_STATUS             BIT(4)
+
 #define RTLGEN_SPEED_MASK                      0x0630
 
 #define RTL_GENERIC_PHYID                      0x001cc800
@@ -655,6 +658,122 @@ static int rtlgen_resume(struct phy_device *phydev)
        return ret;
 }
 
+static int rtl9000a_config_init(struct phy_device *phydev)
+{
+       phydev->autoneg = AUTONEG_DISABLE;
+       phydev->speed = SPEED_100;
+       phydev->duplex = DUPLEX_FULL;
+
+       return 0;
+}
+
+static int rtl9000a_config_aneg(struct phy_device *phydev)
+{
+       int ret;
+       u16 ctl = 0;
+
+       switch (phydev->master_slave_set) {
+       case MASTER_SLAVE_CFG_MASTER_FORCE:
+               ctl |= CTL1000_AS_MASTER;
+               break;
+       case MASTER_SLAVE_CFG_SLAVE_FORCE:
+               break;
+       case MASTER_SLAVE_CFG_UNKNOWN:
+       case MASTER_SLAVE_CFG_UNSUPPORTED:
+               return 0;
+       default:
+               phydev_warn(phydev, "Unsupported Master/Slave mode\n");
+               return -EOPNOTSUPP;
+       }
+
+       ret = phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl);
+       if (ret == 1)
+               ret = genphy_soft_reset(phydev);
+
+       return ret;
+}
+
+static int rtl9000a_read_status(struct phy_device *phydev)
+{
+       int ret;
+
+       phydev->master_slave_get = MASTER_SLAVE_CFG_UNKNOWN;
+       phydev->master_slave_state = MASTER_SLAVE_STATE_UNKNOWN;
+
+       ret = genphy_update_link(phydev);
+       if (ret)
+               return ret;
+
+       ret = phy_read(phydev, MII_CTRL1000);
+       if (ret < 0)
+               return ret;
+       if (ret & CTL1000_AS_MASTER)
+               phydev->master_slave_get = MASTER_SLAVE_CFG_MASTER_FORCE;
+       else
+               phydev->master_slave_get = MASTER_SLAVE_CFG_SLAVE_FORCE;
+
+       ret = phy_read(phydev, MII_STAT1000);
+       if (ret < 0)
+               return ret;
+       if (ret & LPA_1000MSRES)
+               phydev->master_slave_state = MASTER_SLAVE_STATE_MASTER;
+       else
+               phydev->master_slave_state = MASTER_SLAVE_STATE_SLAVE;
+
+       return 0;
+}
+
+static int rtl9000a_ack_interrupt(struct phy_device *phydev)
+{
+       int err;
+
+       err = phy_read(phydev, RTL8211F_INSR);
+
+       return (err < 0) ? err : 0;
+}
+
+static int rtl9000a_config_intr(struct phy_device *phydev)
+{
+       u16 val;
+       int err;
+
+       if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+               err = rtl9000a_ack_interrupt(phydev);
+               if (err)
+                       return err;
+
+               val = (u16)~RTL9000A_GINMR_LINK_STATUS;
+               err = phy_write_paged(phydev, 0xa42, RTL9000A_GINMR, val);
+       } else {
+               val = ~0;
+               err = phy_write_paged(phydev, 0xa42, RTL9000A_GINMR, val);
+               if (err)
+                       return err;
+
+               err = rtl9000a_ack_interrupt(phydev);
+       }
+
+       return phy_write_paged(phydev, 0xa42, RTL9000A_GINMR, val);
+}
+
+static irqreturn_t rtl9000a_handle_interrupt(struct phy_device *phydev)
+{
+       int irq_status;
+
+       irq_status = phy_read(phydev, RTL8211F_INSR);
+       if (irq_status < 0) {
+               phy_error(phydev);
+               return IRQ_NONE;
+       }
+
+       if (!(irq_status & RTL8211F_INER_LINK_STATUS))
+               return IRQ_NONE;
+
+       phy_trigger_machine(phydev);
+
+       return IRQ_HANDLED;
+}
+
 static struct phy_driver realtek_drvs[] = {
        {
                PHY_ID_MATCH_EXACT(0x00008201),
@@ -823,6 +942,19 @@ static struct phy_driver realtek_drvs[] = {
                .handle_interrupt = genphy_handle_interrupt_no_ack,
                .suspend        = genphy_suspend,
                .resume         = genphy_resume,
+       }, {
+               PHY_ID_MATCH_EXACT(0x001ccb00),
+               .name           = "RTL9000AA_RTL9000AN Ethernet",
+               .features       = PHY_BASIC_T1_FEATURES,
+               .config_init    = rtl9000a_config_init,
+               .config_aneg    = rtl9000a_config_aneg,
+               .read_status    = rtl9000a_read_status,
+               .config_intr    = rtl9000a_config_intr,
+               .handle_interrupt = rtl9000a_handle_interrupt,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .read_page      = rtl821x_read_page,
+               .write_page     = rtl821x_write_page,
        },
 };
 
index 1447da1..b4c8080 100644 (file)
@@ -1539,11 +1539,11 @@ static void usbnet_bh (struct timer_list *t)
        }
 }
 
-static void usbnet_bh_tasklet(unsigned long data)
+static void usbnet_bh_tasklet(struct tasklet_struct *t)
 {
-       struct timer_list *t = (struct timer_list *)data;
+       struct usbnet *dev = from_tasklet(dev, t, bh);
 
-       usbnet_bh(t);
+       usbnet_bh(&dev->delay);
 }
 
 
@@ -1673,8 +1673,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
        skb_queue_head_init (&dev->txq);
        skb_queue_head_init (&dev->done);
        skb_queue_head_init(&dev->rxq_pause);
-       dev->bh.func = usbnet_bh_tasklet;
-       dev->bh.data = (unsigned long)&dev->delay;
+       tasklet_setup(&dev->bh, usbnet_bh_tasklet);
        INIT_WORK (&dev->kevent, usbnet_deferred_kevent);
        init_usb_anchor(&dev->deferred);
        timer_setup(&dev->delay, usbnet_bh, 0);
@@ -1964,12 +1963,12 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
                              cmd, reqtype, value, index, buf, size,
                              USB_CTRL_GET_TIMEOUT);
        if (err > 0 && err <= size) {
-        if (data)
-            memcpy(data, buf, err);
-        else
-            netdev_dbg(dev->net,
-                "Huh? Data requested but thrown away.\n");
-    }
+               if (data)
+                       memcpy(data, buf, err);
+               else
+                       netdev_dbg(dev->net,
+                                  "Huh? Data requested but thrown away.\n");
+       }
        kfree(buf);
 out:
        return err;
index ad0abb1..adaa1a7 100644 (file)
@@ -155,7 +155,7 @@ static int fdp_nci_i2c_read(struct fdp_i2c_phy *phy, struct sk_buff **skb)
 
                /*
                 * LRC check failed. This may due to transmission error or
-                * desynchronization between driver and FDP. Drop the paquet
+                * desynchronization between driver and FDP. Drop the packet
                 * and force resynchronization
                 */
                if (lrc) {
index c70f62f..3397802 100644 (file)
 
 /* Bits determining whether its a direct command or register R/W,
  * whether to use a continuous SPI transaction or not, and the actual
- * direct cmd opcode or regster address.
+ * direct cmd opcode or register address.
  */
 #define TRF7970A_CMD_BIT_CTRL                  BIT(7)
 #define TRF7970A_CMD_BIT_RW                    BIT(6)
index 497a7e0..654e924 100644 (file)
@@ -27,7 +27,7 @@
 
 static struct gen_pool *muram_pool;
 static spinlock_t cpm_muram_lock;
-static u8 __iomem *muram_vbase;
+static void __iomem *muram_vbase;
 static phys_addr_t muram_pbase;
 
 struct muram_block {
@@ -223,9 +223,9 @@ void __iomem *cpm_muram_addr(unsigned long offset)
 }
 EXPORT_SYMBOL(cpm_muram_addr);
 
-unsigned long cpm_muram_offset(void __iomem *addr)
+unsigned long cpm_muram_offset(const void __iomem *addr)
 {
-       return addr - (void __iomem *)muram_vbase;
+       return addr - muram_vbase;
 }
 EXPORT_SYMBOL(cpm_muram_offset);
 
@@ -235,6 +235,18 @@ EXPORT_SYMBOL(cpm_muram_offset);
  */
 dma_addr_t cpm_muram_dma(void __iomem *addr)
 {
-       return muram_pbase + ((u8 __iomem *)addr - muram_vbase);
+       return muram_pbase + (addr - muram_vbase);
 }
 EXPORT_SYMBOL(cpm_muram_dma);
+
+/*
+ * As cpm_muram_free, but takes the virtual address rather than the
+ * muram offset.
+ */
+void cpm_muram_free_addr(const void __iomem *addr)
+{
+       if (!addr)
+               return;
+       cpm_muram_free(cpm_muram_offset(addr));
+}
+EXPORT_SYMBOL(cpm_muram_free_addr);
index 7faf6a3..ac4d83a 100644 (file)
@@ -123,6 +123,7 @@ void unregister_candev(struct net_device *dev);
 int can_restart_now(struct net_device *dev);
 void can_bus_off(struct net_device *dev);
 
+const char *can_get_state_str(const enum can_state state);
 void can_change_state(struct net_device *dev, struct can_frame *cf,
                      enum can_state tx_state, enum can_state rx_state);
 
index dda61d1..9d1f29f 100644 (file)
@@ -31,6 +31,7 @@ struct ipv6_devconf {
        __s32           max_desync_factor;
        __s32           max_addresses;
        __s32           accept_ra_defrtr;
+       __u32           ra_defrtr_metric;
        __s32           accept_ra_min_hop_limit;
        __s32           accept_ra_pinfo;
        __s32           ignore_routes_with_linkdown;
index 823411e..71ae6aa 100644 (file)
@@ -842,11 +842,16 @@ struct mlx5_ifc_qos_cap_bits {
        u8         reserved_at_4[0x1];
        u8         packet_pacing_burst_bound[0x1];
        u8         packet_pacing_typical_size[0x1];
-       u8         reserved_at_7[0x4];
+       u8         reserved_at_7[0x1];
+       u8         nic_sq_scheduling[0x1];
+       u8         nic_bw_share[0x1];
+       u8         nic_rate_limit[0x1];
        u8         packet_pacing_uid[0x1];
        u8         reserved_at_c[0x14];
 
-       u8         reserved_at_20[0x20];
+       u8         reserved_at_20[0xb];
+       u8         log_max_qos_nic_queue_group[0x5];
+       u8         reserved_at_30[0x10];
 
        u8         packet_pacing_max_rate[0x20];
 
@@ -3347,7 +3352,7 @@ struct mlx5_ifc_sqc_bits {
        u8         reserved_at_e0[0x10];
        u8         packet_pacing_rate_limit_index[0x10];
        u8         tis_lst_sz[0x10];
-       u8         reserved_at_110[0x10];
+       u8         qos_queue_group_id[0x10];
 
        u8         reserved_at_120[0x40];
 
@@ -3362,6 +3367,7 @@ enum {
        SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1,
        SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2,
        SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3,
+       SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP = 0x4,
 };
 
 enum {
@@ -4805,6 +4811,7 @@ struct mlx5_ifc_query_scheduling_element_out_bits {
 
 enum {
        SCHEDULING_HIERARCHY_E_SWITCH = 0x2,
+       SCHEDULING_HIERARCHY_NIC = 0x3,
 };
 
 struct mlx5_ifc_query_scheduling_element_in_bits {
index 934de56..c06d6aa 100644 (file)
@@ -84,6 +84,7 @@ enum {
        NETIF_F_GRO_FRAGLIST_BIT,       /* Fraglist GRO */
 
        NETIF_F_HW_MACSEC_BIT,          /* Offload MACsec operations */
+       NETIF_F_GRO_UDP_FWD_BIT,        /* Allow UDP GRO for forwarding */
 
        /*
         * Add your fresh new feature above and remember to update
@@ -157,6 +158,7 @@ enum {
 #define NETIF_F_GRO_FRAGLIST   __NETIF_F(GRO_FRAGLIST)
 #define NETIF_F_GSO_FRAGLIST   __NETIF_F(GSO_FRAGLIST)
 #define NETIF_F_HW_MACSEC      __NETIF_F(HW_MACSEC)
+#define NETIF_F_GRO_UDP_FWD    __NETIF_F(GRO_UDP_FWD)
 
 /* Finds the next feature with the highest number of the range of start till 0.
  */
@@ -234,7 +236,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
 #define NETIF_F_SOFT_FEATURES  (NETIF_F_GSO | NETIF_F_GRO)
 
 /* Changeable features with no special hardware requirements that defaults to off. */
-#define NETIF_F_SOFT_FEATURES_OFF      NETIF_F_GRO_FRAGLIST
+#define NETIF_F_SOFT_FEATURES_OFF      (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD)
 
 #define NETIF_F_VLAN_FEATURES  (NETIF_F_HW_VLAN_CTAG_FILTER | \
                                 NETIF_F_HW_VLAN_CTAG_RX | \
index ef51725..9e85725 100644 (file)
@@ -858,6 +858,7 @@ enum tc_setup_type {
        TC_SETUP_QDISC_ETS,
        TC_SETUP_QDISC_TBF,
        TC_SETUP_QDISC_FIFO,
+       TC_SETUP_QDISC_HTB,
 };
 
 /* These structures hold the attributes of bpf state that are being passed
index 186dad2..9313b5a 100644 (file)
@@ -3859,7 +3859,7 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
 void skb_complete_tx_timestamp(struct sk_buff *skb,
                               struct skb_shared_hwtstamps *hwtstamps);
 
-void __skb_tstamp_tx(struct sk_buff *orig_skb,
+void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb,
                     struct skb_shared_hwtstamps *hwtstamps,
                     struct sock *sk, int tstype);
 
index 2f87377..48d8a36 100644 (file)
@@ -496,7 +496,8 @@ static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn)
 }
 
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
-                                              const struct sk_buff *orig_skb);
+                                              const struct sk_buff *orig_skb,
+                                              const struct sk_buff *ack_skb);
 
 static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
 {
index f466819..d12ed28 100644 (file)
@@ -380,6 +380,8 @@ struct devlink_resource {
 
 #define DEVLINK_RESOURCE_ID_PARENT_TOP 0
 
+#define DEVLINK_RESOURCE_GENERIC_NAME_PORTS "physical_ports"
+
 #define __DEVLINK_PARAM_MAX_STRING_VALUE 32
 enum devlink_param_type {
        DEVLINK_PARAM_TYPE_U8,
index 111d777..c11f80f 100644 (file)
@@ -141,7 +141,6 @@ struct inet_connection_sock {
 #define ICSK_TIME_RETRANS      1       /* Retransmit timer */
 #define ICSK_TIME_DACK         2       /* Delayed ack timer */
 #define ICSK_TIME_PROBE0       3       /* Zero window probe timer */
-#define ICSK_TIME_EARLY_RETRANS 4      /* Early retransmit timer */
 #define ICSK_TIME_LOSS_PROBE   5       /* Tail loss probe timer */
 #define ICSK_TIME_REO_TIMEOUT  6       /* Reordering timer */
 
@@ -227,8 +226,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
        }
 
        if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 ||
-           what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE ||
-           what == ICSK_TIME_REO_TIMEOUT) {
+           what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) {
                icsk->icsk_pending = what;
                icsk->icsk_timeout = jiffies + when;
                sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
index 2a52777..f51a118 100644 (file)
@@ -174,7 +174,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
                                     struct net_device *dev);
 struct fib6_info *rt6_add_dflt_router(struct net *net,
                                     const struct in6_addr *gwaddr,
-                                    struct net_device *dev, unsigned int pref);
+                                    struct net_device *dev, unsigned int pref,
+                                    u32 defrtr_usr_metric);
 
 void rt6_purge_dflt_routers(struct net *net);
 
index 0f2a9c4..255e4f4 100644 (file)
@@ -783,6 +783,42 @@ struct tc_mq_qopt_offload {
        };
 };
 
+enum tc_htb_command {
+       /* Root */
+       TC_HTB_CREATE, /* Initialize HTB offload. */
+       TC_HTB_DESTROY, /* Destroy HTB offload. */
+
+       /* Classes */
+       /* Allocate qid and create leaf. */
+       TC_HTB_LEAF_ALLOC_QUEUE,
+       /* Convert leaf to inner, preserve and return qid, create new leaf. */
+       TC_HTB_LEAF_TO_INNER,
+       /* Delete leaf, while siblings remain. */
+       TC_HTB_LEAF_DEL,
+       /* Delete leaf, convert parent to leaf, preserving qid. */
+       TC_HTB_LEAF_DEL_LAST,
+       /* TC_HTB_LEAF_DEL_LAST, but delete driver data on hardware errors. */
+       TC_HTB_LEAF_DEL_LAST_FORCE,
+       /* Modify parameters of a node. */
+       TC_HTB_NODE_MODIFY,
+
+       /* Class qdisc */
+       TC_HTB_LEAF_QUERY_QUEUE, /* Query qid by classid. */
+};
+
+struct tc_htb_qopt_offload {
+       struct netlink_ext_ack *extack;
+       enum tc_htb_command command;
+       u16 classid;
+       u32 parent_classid;
+       u16 qid;
+       u16 moved_qid;
+       u64 rate;
+       u64 ceil;
+};
+
+#define TC_HTB_CLASSID_ROOT U32_MAX
+
 enum tc_red_command {
        TC_RED_REPLACE,
        TC_RED_DESTROY,
index e7bee99..070f01b 100644 (file)
@@ -210,7 +210,8 @@ struct Qdisc_class_ops {
        int                     (*change)(struct Qdisc *, u32, u32,
                                        struct nlattr **, unsigned long *,
                                        struct netlink_ext_ack *);
-       int                     (*delete)(struct Qdisc *, unsigned long);
+       int                     (*delete)(struct Qdisc *, unsigned long,
+                                         struct netlink_ext_ack *);
        void                    (*walk)(struct Qdisc *, struct qdisc_walker * arg);
 
        /* Filter manipulation */
@@ -552,14 +553,20 @@ static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
        return qdisc->dev_queue->dev;
 }
 
-static inline void sch_tree_lock(const struct Qdisc *q)
+static inline void sch_tree_lock(struct Qdisc *q)
 {
-       spin_lock_bh(qdisc_root_sleeping_lock(q));
+       if (q->flags & TCQ_F_MQROOT)
+               spin_lock_bh(qdisc_lock(q));
+       else
+               spin_lock_bh(qdisc_root_sleeping_lock(q));
 }
 
-static inline void sch_tree_unlock(const struct Qdisc *q)
+static inline void sch_tree_unlock(struct Qdisc *q)
 {
-       spin_unlock_bh(qdisc_root_sleeping_lock(q));
+       if (q->flags & TCQ_F_MQROOT)
+               spin_unlock_bh(qdisc_lock(q));
+       else
+               spin_unlock_bh(qdisc_root_sleeping_lock(q));
 }
 
 extern struct Qdisc noop_qdisc;
index 3feddfe..4925a1b 100644 (file)
 #define QE_NUM_OF_BRGS 16
 #define QE_NUM_OF_PORTS        1024
 
-/* Memory partitions
-*/
-#define MEM_PART_SYSTEM                0
-#define MEM_PART_SECONDARY     1
-#define MEM_PART_MURAM         2
-
 /* Clocks and BRGs */
 enum qe_clock {
        QE_CLK_NONE = 0,
@@ -102,8 +96,9 @@ s32 cpm_muram_alloc(unsigned long size, unsigned long align);
 void cpm_muram_free(s32 offset);
 s32 cpm_muram_alloc_fixed(unsigned long offset, unsigned long size);
 void __iomem *cpm_muram_addr(unsigned long offset);
-unsigned long cpm_muram_offset(void __iomem *addr);
+unsigned long cpm_muram_offset(const void __iomem *addr);
 dma_addr_t cpm_muram_dma(void __iomem *addr);
+void cpm_muram_free_addr(const void __iomem *addr);
 #else
 static inline s32 cpm_muram_alloc(unsigned long size,
                                  unsigned long align)
@@ -126,7 +121,7 @@ static inline void __iomem *cpm_muram_addr(unsigned long offset)
        return NULL;
 }
 
-static inline unsigned long cpm_muram_offset(void __iomem *addr)
+static inline unsigned long cpm_muram_offset(const void __iomem *addr)
 {
        return -ENOSYS;
 }
@@ -135,6 +130,9 @@ static inline dma_addr_t cpm_muram_dma(void __iomem *addr)
 {
        return 0;
 }
+static inline void cpm_muram_free_addr(const void __iomem *addr)
+{
+}
 #endif /* defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) */
 
 /* QE PIO */
@@ -239,6 +237,7 @@ static inline int qe_alive_during_sleep(void)
 #define qe_muram_addr cpm_muram_addr
 #define qe_muram_offset cpm_muram_offset
 #define qe_muram_dma cpm_muram_dma
+#define qe_muram_free_addr cpm_muram_free_addr
 
 #ifdef CONFIG_PPC32
 #define qe_iowrite8(val, addr)     out_8(addr, val)
index dc4e794..9696a5b 100644 (file)
@@ -146,7 +146,6 @@ struct ucc_fast_info {
        resource_size_t regs;
        int irq;
        u32 uccm_mask;
-       int bd_mem_part;
        int brkpt_support;
        int grant_support;
        int tsa;
index 2bd0d8b..eb8018c 100644 (file)
@@ -525,6 +525,8 @@ enum {
        IFLA_BRPORT_BACKUP_PORT,
        IFLA_BRPORT_MRP_RING_OPEN,
        IFLA_BRPORT_MRP_IN_OPEN,
+       IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
+       IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
        __IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
index 13e8751..7060377 100644 (file)
@@ -189,6 +189,7 @@ enum {
        DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN,
        DEVCONF_NDISC_TCLASS,
        DEVCONF_RPL_SEG_ENABLED,
+       DEVCONF_RA_DEFRTR_METRIC,
        DEVCONF_MAX
 };
 
index 9e7c2c6..79a699f 100644 (file)
@@ -434,6 +434,7 @@ enum {
        TCA_HTB_RATE64,
        TCA_HTB_CEIL64,
        TCA_HTB_PAD,
+       TCA_HTB_OFFLOAD,
        __TCA_HTB_MAX,
 };
 
index 458179d..1e05d3c 100644 (file)
@@ -571,6 +571,7 @@ enum {
        NET_IPV6_ACCEPT_SOURCE_ROUTE=25,
        NET_IPV6_ACCEPT_RA_FROM_LOCAL=26,
        NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27,
+       NET_IPV6_RA_DEFRTR_METRIC=28,
        __NET_IPV6_MAX
 };
 
index 768e93b..42fc5a6 100644 (file)
@@ -314,6 +314,7 @@ enum {
        TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */
        TCP_NLA_BYTES_NOTSENT,  /* Bytes in write queue not yet sent */
        TCP_NLA_EDT,            /* Earliest departure time (CLOCK_MONOTONIC) */
+       TCP_NLA_TTL,            /* TTL or hop limit of a packet received */
 };
 
 /* for TCP_MD5SIG socket option */
@@ -353,5 +354,9 @@ struct tcp_zerocopy_receive {
        __u64 copybuf_address;  /* in: copybuf address (small reads) */
        __s32 copybuf_len; /* in/out: copybuf bytes avail/used or error */
        __u32 flags; /* in: flags */
+       __u64 msg_control; /* ancillary data */
+       __u64 msg_controllen;
+       __u32 msg_flags;
+       /* __u32 hole;  Next we must add >1 u32 otherwise length checks fail. */
 };
 #endif /* _UAPI_LINUX_TCP_H */
index 3d11fec..64468c4 100644 (file)
@@ -4,7 +4,6 @@
 #
 
 menuconfig NET_9P
-       depends on NET
        tristate "Plan 9 Resource Sharing Support (9P2000)"
        help
          If you say Y here, you will get experimental support for
index d96b0aa..9ca9572 100644 (file)
@@ -6,20 +6,19 @@
 # Rewritten to use lists instead of if-statements.
 #
 
-obj-$(CONFIG_NET)              := devres.o socket.o core/
+obj-y                          := devres.o socket.o core/
 
-tmp-$(CONFIG_COMPAT)           := compat.o
-obj-$(CONFIG_NET)              += $(tmp-y)
+obj-$(CONFIG_COMPAT)           += compat.o
 
 # LLC has to be linked before the files in net/802/
 obj-$(CONFIG_LLC)              += llc/
-obj-$(CONFIG_NET)              += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/
+obj-y                          += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/
 obj-$(CONFIG_NETFILTER)                += netfilter/
 obj-$(CONFIG_INET)             += ipv4/
 obj-$(CONFIG_TLS)              += tls/
 obj-$(CONFIG_XFRM)             += xfrm/
 obj-$(CONFIG_UNIX_SCM)         += unix/
-obj-$(CONFIG_NET)              += ipv6/
+obj-y                          += ipv6/
 obj-$(CONFIG_BPFILTER)         += bpfilter/
 obj-$(CONFIG_PACKET)           += packet/
 obj-$(CONFIG_NET_KEY)          += key/
@@ -56,16 +55,12 @@ obj-$(CONFIG_SMC)           += smc/
 obj-$(CONFIG_RFKILL)           += rfkill/
 obj-$(CONFIG_NET_9P)           += 9p/
 obj-$(CONFIG_CAIF)             += caif/
-ifneq ($(CONFIG_DCB),)
-obj-y                          += dcb/
-endif
+obj-$(CONFIG_DCB)              += dcb/
 obj-$(CONFIG_6LOWPAN)          += 6lowpan/
 obj-$(CONFIG_IEEE802154)       += ieee802154/
 obj-$(CONFIG_MAC802154)                += mac802154/
 
-ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)           += sysctl_net.o
-endif
 obj-$(CONFIG_DNS_RESOLVER)     += dns_resolver/
 obj-$(CONFIG_CEPH_LIB)         += ceph/
 obj-$(CONFIG_BATMAN_ADV)       += batman-adv/
@@ -77,12 +72,8 @@ obj-$(CONFIG_VSOCKETS)       += vmw_vsock/
 obj-$(CONFIG_MPLS)             += mpls/
 obj-$(CONFIG_NET_NSH)          += nsh/
 obj-$(CONFIG_HSR)              += hsr/
-ifneq ($(CONFIG_NET_SWITCHDEV),)
-obj-y                          += switchdev/
-endif
-ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
-obj-y                          += l3mdev/
-endif
+obj-$(CONFIG_NET_SWITCHDEV)    += switchdev/
+obj-$(CONFIG_NET_L3_MASTER_DEV)        += l3mdev/
 obj-$(CONFIG_QRTR)             += qrtr/
 obj-$(CONFIG_NET_NCSI)         += ncsi/
 obj-$(CONFIG_XDP_SOCKETS)      += xdp/
index 993afd5..43ae3dc 100644 (file)
@@ -9,7 +9,6 @@
 
 config BATMAN_ADV
        tristate "B.A.T.M.A.N. Advanced Meshing Protocol"
-       depends on NET
        select LIBCRC32C
        help
          B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
index 64e669a..400c513 100644 (file)
@@ -5,7 +5,7 @@
 
 menuconfig BT
        tristate "Bluetooth subsystem support"
-       depends on NET && !S390
+       depends on !S390
        depends on RFKILL || !RFKILL
        select CRC16
        select CRYPTO
index 8ad0233..3d4a214 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menuconfig BPFILTER
        bool "BPF based packet filtering framework (BPFILTER)"
-       depends on NET && BPF && INET
+       depends on BPF && INET
        select USERMODE_DRIVER
        help
          This builds experimental bpfilter framework that is aiming to
index 4702702..7fb9a02 100644 (file)
@@ -18,7 +18,7 @@ br_netfilter-y := br_netfilter_hooks.o
 br_netfilter-$(subst m,y,$(CONFIG_IPV6)) += br_netfilter_ipv6.o
 obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
 
-bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
+bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o br_multicast_eht.o
 
 bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o
 
index 257ac4e..6f672eb 100644 (file)
@@ -33,6 +33,7 @@
 #endif
 
 #include "br_private.h"
+#include "br_private_mcast_eht.h"
 
 static const struct rhashtable_params br_mdb_rht_params = {
        .head_offset = offsetof(struct net_bridge_mdb_entry, rhnode),
@@ -441,7 +442,8 @@ static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
        br_multicast_sg_add_exclude_ports(star_mp, sg);
 }
 
-static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src)
+static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src,
+                                       bool fastleave)
 {
        struct net_bridge_port_group *p, *pg = src->pg;
        struct net_bridge_port_group __rcu **pp;
@@ -466,6 +468,8 @@ static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src)
                    (p->flags & MDB_PG_FLAGS_PERMANENT))
                        break;
 
+               if (fastleave)
+                       p->flags |= MDB_PG_FLAGS_FAST_LEAVE;
                br_multicast_del_pg(mp, p, pp);
                break;
        }
@@ -559,11 +563,12 @@ static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc)
        kfree_rcu(src, rcu);
 }
 
-static void br_multicast_del_group_src(struct net_bridge_group_src *src)
+void br_multicast_del_group_src(struct net_bridge_group_src *src,
+                               bool fastleave)
 {
        struct net_bridge *br = src->pg->key.port->br;
 
-       br_multicast_fwd_src_remove(src);
+       br_multicast_fwd_src_remove(src, fastleave);
        hlist_del_init_rcu(&src->node);
        src->pg->src_ents--;
        hlist_add_head(&src->mcast_gc.gc_node, &br->mcast_gc_list);
@@ -593,8 +598,9 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
 
        rcu_assign_pointer(*pp, pg->next);
        hlist_del_init(&pg->mglist);
+       br_multicast_eht_clean_sets(pg);
        hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node)
-               br_multicast_del_group_src(ent);
+               br_multicast_del_group_src(ent, false);
        br_mdb_notify(br->dev, mp, pg, RTM_DELMDB);
        if (!br_multicast_is_star_g(&mp->addr)) {
                rhashtable_remove_fast(&br->sg_port_tbl, &pg->rhnode,
@@ -651,7 +657,7 @@ static void br_multicast_port_group_expired(struct timer_list *t)
        pg->filter_mode = MCAST_INCLUDE;
        hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) {
                if (!timer_pending(&src_ent->timer)) {
-                       br_multicast_del_group_src(src_ent);
+                       br_multicast_del_group_src(src_ent, false);
                        changed = true;
                }
        }
@@ -1078,7 +1084,7 @@ static void br_multicast_group_src_expired(struct timer_list *t)
 
        pg = src->pg;
        if (pg->filter_mode == MCAST_INCLUDE) {
-               br_multicast_del_group_src(src);
+               br_multicast_del_group_src(src, false);
                if (!hlist_empty(&pg->src_list))
                        goto out;
                br_multicast_find_del_pg(br, pg);
@@ -1090,7 +1096,7 @@ out:
        spin_unlock(&br->multicast_lock);
 }
 
-static struct net_bridge_group_src *
+struct net_bridge_group_src *
 br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip)
 {
        struct net_bridge_group_src *ent;
@@ -1172,6 +1178,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
        p->flags = flags;
        p->filter_mode = filter_mode;
        p->rt_protocol = rt_protocol;
+       p->eht_host_tree = RB_ROOT;
+       p->eht_set_tree = RB_ROOT;
        p->mcast_gc.destroy = br_multicast_destroy_port_group;
        INIT_HLIST_HEAD(&p->src_list);
 
@@ -1292,7 +1300,7 @@ static int br_multicast_add_group(struct net_bridge *br,
        pg = __br_multicast_add_group(br, port, group, src, filter_mode,
                                      igmpv2_mldv1, false);
        /* NULL is considered valid for host joined groups */
-       err = IS_ERR(pg) ? PTR_ERR(pg) : 0;
+       err = PTR_ERR_OR_ZERO(pg);
        spin_unlock(&br->multicast_lock);
 
        return err;
@@ -1600,6 +1608,7 @@ static void br_mc_disabled_update(struct net_device *dev, bool value)
 int br_multicast_add_port(struct net_bridge_port *port)
 {
        port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+       port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
 
        timer_setup(&port->multicast_router_timer,
                    br_multicast_router_expired, 0);
@@ -1700,7 +1709,7 @@ static int __grp_src_delete_marked(struct net_bridge_port_group *pg)
 
        hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node)
                if (ent->flags & BR_SGRP_F_DELETE) {
-                       br_multicast_del_group_src(ent);
+                       br_multicast_del_group_src(ent, false);
                        deleted++;
                }
 
@@ -1799,8 +1808,9 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
  * INCLUDE (A)    ALLOW (B)     INCLUDE (A+B)            (B)=GMI
  * EXCLUDE (X,Y)  ALLOW (A)     EXCLUDE (X+A,Y-A)        (A)=GMI
  */
-static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
-                                    void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_addr,
+                                    void *srcs, u32 nsrcs, size_t addr_size,
+                                    int grec_type)
 {
        struct net_bridge *br = pg->key.port->br;
        struct net_bridge_group_src *ent;
@@ -1812,7 +1822,7 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
        memset(&src_ip, 0, sizeof(src_ip));
        src_ip.proto = pg->key.addr.proto;
        for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-               memcpy(&src_ip.src, srcs, src_size);
+               memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
                ent = br_multicast_find_group_src(pg, &src_ip);
                if (!ent) {
                        ent = br_multicast_new_group_src(pg, &src_ip);
@@ -1822,9 +1832,11 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
 
                if (ent)
                        __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
-               srcs += src_size;
        }
 
+       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+               changed = true;
+
        return changed;
 }
 
@@ -1833,8 +1845,9 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
  *                                                       Delete (A-B)
  *                                                       Group Timer=GMI
  */
-static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
-                                void *srcs, u32 nsrcs, size_t src_size)
+static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
+                                void *srcs, u32 nsrcs, size_t addr_size,
+                                int grec_type)
 {
        struct net_bridge_group_src *ent;
        struct br_ip src_ip;
@@ -1846,7 +1859,7 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
        memset(&src_ip, 0, sizeof(src_ip));
        src_ip.proto = pg->key.addr.proto;
        for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-               memcpy(&src_ip.src, srcs, src_size);
+               memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
                ent = br_multicast_find_group_src(pg, &src_ip);
                if (ent)
                        ent->flags &= ~BR_SGRP_F_DELETE;
@@ -1854,9 +1867,10 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
                        ent = br_multicast_new_group_src(pg, &src_ip);
                if (ent)
                        br_multicast_fwd_src_handle(ent);
-               srcs += src_size;
        }
 
+       br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+
        __grp_src_delete_marked(pg);
 }
 
@@ -1866,8 +1880,9 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
  *                                                       Delete (Y-A)
  *                                                       Group Timer=GMI
  */
-static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
-                                void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
+                                void *srcs, u32 nsrcs, size_t addr_size,
+                                int grec_type)
 {
        struct net_bridge *br = pg->key.port->br;
        struct net_bridge_group_src *ent;
@@ -1882,7 +1897,7 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
        memset(&src_ip, 0, sizeof(src_ip));
        src_ip.proto = pg->key.addr.proto;
        for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-               memcpy(&src_ip.src, srcs, src_size);
+               memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
                ent = br_multicast_find_group_src(pg, &src_ip);
                if (ent) {
                        ent->flags &= ~BR_SGRP_F_DELETE;
@@ -1894,29 +1909,34 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
                                changed = true;
                        }
                }
-               srcs += src_size;
        }
 
+       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+               changed = true;
+
        if (__grp_src_delete_marked(pg))
                changed = true;
 
        return changed;
 }
 
-static bool br_multicast_isexc(struct net_bridge_port_group *pg,
-                              void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr,
+                              void *srcs, u32 nsrcs, size_t addr_size,
+                              int grec_type)
 {
        struct net_bridge *br = pg->key.port->br;
        bool changed = false;
 
        switch (pg->filter_mode) {
        case MCAST_INCLUDE:
-               __grp_src_isexc_incl(pg, srcs, nsrcs, src_size);
+               __grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size,
+                                    grec_type);
                br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
                changed = true;
                break;
        case MCAST_EXCLUDE:
-               changed = __grp_src_isexc_excl(pg, srcs, nsrcs, src_size);
+               changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size,
+                                              grec_type);
                break;
        }
 
@@ -1930,8 +1950,9 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg,
  * INCLUDE (A)    TO_IN (B)     INCLUDE (A+B)            (B)=GMI
  *                                                       Send Q(G,A-B)
  */
-static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
-                               void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
+                               void *srcs, u32 nsrcs, size_t addr_size,
+                               int grec_type)
 {
        struct net_bridge *br = pg->key.port->br;
        u32 src_idx, to_send = pg->src_ents;
@@ -1946,7 +1967,7 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
        memset(&src_ip, 0, sizeof(src_ip));
        src_ip.proto = pg->key.addr.proto;
        for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-               memcpy(&src_ip.src, srcs, src_size);
+               memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
                ent = br_multicast_find_group_src(pg, &src_ip);
                if (ent) {
                        ent->flags &= ~BR_SGRP_F_SEND;
@@ -1958,9 +1979,11 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
                }
                if (ent)
                        __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
-               srcs += src_size;
        }
 
+       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+               changed = true;
+
        if (to_send)
                __grp_src_query_marked_and_rexmit(pg);
 
@@ -1972,8 +1995,9 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
  *                                                       Send Q(G,X-A)
  *                                                       Send Q(G)
  */
-static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
-                               void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr,
+                               void *srcs, u32 nsrcs, size_t addr_size,
+                               int grec_type)
 {
        struct net_bridge *br = pg->key.port->br;
        u32 src_idx, to_send = pg->src_ents;
@@ -1989,7 +2013,7 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
        memset(&src_ip, 0, sizeof(src_ip));
        src_ip.proto = pg->key.addr.proto;
        for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-               memcpy(&src_ip.src, srcs, src_size);
+               memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
                ent = br_multicast_find_group_src(pg, &src_ip);
                if (ent) {
                        if (timer_pending(&ent->timer)) {
@@ -2003,9 +2027,11 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
                }
                if (ent)
                        __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
-               srcs += src_size;
        }
 
+       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+               changed = true;
+
        if (to_send)
                __grp_src_query_marked_and_rexmit(pg);
 
@@ -2014,20 +2040,32 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
        return changed;
 }
 
-static bool br_multicast_toin(struct net_bridge_port_group *pg,
-                             void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
+                             void *srcs, u32 nsrcs, size_t addr_size,
+                             int grec_type)
 {
        bool changed = false;
 
        switch (pg->filter_mode) {
        case MCAST_INCLUDE:
-               changed = __grp_src_toin_incl(pg, srcs, nsrcs, src_size);
+               changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size,
+                                             grec_type);
                break;
        case MCAST_EXCLUDE:
-               changed = __grp_src_toin_excl(pg, srcs, nsrcs, src_size);
+               changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size,
+                                             grec_type);
                break;
        }
 
+       if (br_multicast_eht_should_del_pg(pg)) {
+               pg->flags |= MDB_PG_FLAGS_FAST_LEAVE;
+               br_multicast_find_del_pg(pg->key.port->br, pg);
+               /* a notification has already been sent and we shouldn't
+                * access pg after the delete so we have to return false
+                */
+               changed = false;
+       }
+
        return changed;
 }
 
@@ -2037,8 +2075,9 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg,
  *                                                       Send Q(G,A*B)
  *                                                       Group Timer=GMI
  */
-static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
-                               void *srcs, u32 nsrcs, size_t src_size)
+static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
+                               void *srcs, u32 nsrcs, size_t addr_size,
+                               int grec_type)
 {
        struct net_bridge_group_src *ent;
        u32 src_idx, to_send = 0;
@@ -2050,7 +2089,7 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
        memset(&src_ip, 0, sizeof(src_ip));
        src_ip.proto = pg->key.addr.proto;
        for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-               memcpy(&src_ip.src, srcs, src_size);
+               memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
                ent = br_multicast_find_group_src(pg, &src_ip);
                if (ent) {
                        ent->flags = (ent->flags & ~BR_SGRP_F_DELETE) |
@@ -2061,9 +2100,10 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
                }
                if (ent)
                        br_multicast_fwd_src_handle(ent);
-               srcs += src_size;
        }
 
+       br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+
        __grp_src_delete_marked(pg);
        if (to_send)
                __grp_src_query_marked_and_rexmit(pg);
@@ -2076,8 +2116,9 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
  *                                                       Send Q(G,A-Y)
  *                                                       Group Timer=GMI
  */
-static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
-                               void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr,
+                               void *srcs, u32 nsrcs, size_t addr_size,
+                               int grec_type)
 {
        struct net_bridge_group_src *ent;
        u32 src_idx, to_send = 0;
@@ -2090,7 +2131,7 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
        memset(&src_ip, 0, sizeof(src_ip));
        src_ip.proto = pg->key.addr.proto;
        for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-               memcpy(&src_ip.src, srcs, src_size);
+               memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
                ent = br_multicast_find_group_src(pg, &src_ip);
                if (ent) {
                        ent->flags &= ~BR_SGRP_F_DELETE;
@@ -2105,9 +2146,11 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
                        ent->flags |= BR_SGRP_F_SEND;
                        to_send++;
                }
-               srcs += src_size;
        }
 
+       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+               changed = true;
+
        if (__grp_src_delete_marked(pg))
                changed = true;
        if (to_send)
@@ -2116,20 +2159,23 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
        return changed;
 }
 
-static bool br_multicast_toex(struct net_bridge_port_group *pg,
-                             void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr,
+                             void *srcs, u32 nsrcs, size_t addr_size,
+                             int grec_type)
 {
        struct net_bridge *br = pg->key.port->br;
        bool changed = false;
 
        switch (pg->filter_mode) {
        case MCAST_INCLUDE:
-               __grp_src_toex_incl(pg, srcs, nsrcs, src_size);
+               __grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size,
+                                   grec_type);
                br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
                changed = true;
                break;
        case MCAST_EXCLUDE:
-               changed = __grp_src_toex_excl(pg, srcs, nsrcs, src_size);
+               changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size,
+                                             grec_type);
                break;
        }
 
@@ -2142,11 +2188,12 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg,
 /* State          Msg type      New state                Actions
  * INCLUDE (A)    BLOCK (B)     INCLUDE (A)              Send Q(G,A*B)
  */
-static void __grp_src_block_incl(struct net_bridge_port_group *pg,
-                                void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
+                                void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
 {
        struct net_bridge_group_src *ent;
        u32 src_idx, to_send = 0;
+       bool changed = false;
        struct br_ip src_ip;
 
        hlist_for_each_entry(ent, &pg->src_list, node)
@@ -2155,28 +2202,29 @@ static void __grp_src_block_incl(struct net_bridge_port_group *pg,
        memset(&src_ip, 0, sizeof(src_ip));
        src_ip.proto = pg->key.addr.proto;
        for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-               memcpy(&src_ip.src, srcs, src_size);
+               memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
                ent = br_multicast_find_group_src(pg, &src_ip);
                if (ent) {
                        ent->flags |= BR_SGRP_F_SEND;
                        to_send++;
                }
-               srcs += src_size;
        }
 
+       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+               changed = true;
+
        if (to_send)
                __grp_src_query_marked_and_rexmit(pg);
 
-       if (pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list))
-               br_multicast_find_del_pg(pg->key.port->br, pg);
+       return changed;
 }
 
 /* State          Msg type      New state                Actions
  * EXCLUDE (X,Y)  BLOCK (A)     EXCLUDE (X+(A-Y),Y)      (A-X-Y)=Group Timer
  *                                                       Send Q(G,A-Y)
  */
-static bool __grp_src_block_excl(struct net_bridge_port_group *pg,
-                                void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr,
+                                void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
 {
        struct net_bridge_group_src *ent;
        u32 src_idx, to_send = 0;
@@ -2189,7 +2237,7 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg,
        memset(&src_ip, 0, sizeof(src_ip));
        src_ip.proto = pg->key.addr.proto;
        for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-               memcpy(&src_ip.src, srcs, src_size);
+               memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
                ent = br_multicast_find_group_src(pg, &src_ip);
                if (!ent) {
                        ent = br_multicast_new_group_src(pg, &src_ip);
@@ -2202,29 +2250,44 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg,
                        ent->flags |= BR_SGRP_F_SEND;
                        to_send++;
                }
-               srcs += src_size;
        }
 
+       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+               changed = true;
+
        if (to_send)
                __grp_src_query_marked_and_rexmit(pg);
 
        return changed;
 }
 
-static bool br_multicast_block(struct net_bridge_port_group *pg,
-                              void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr,
+                              void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
 {
        bool changed = false;
 
        switch (pg->filter_mode) {
        case MCAST_INCLUDE:
-               __grp_src_block_incl(pg, srcs, nsrcs, src_size);
+               changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size,
+                                              grec_type);
                break;
        case MCAST_EXCLUDE:
-               changed = __grp_src_block_excl(pg, srcs, nsrcs, src_size);
+               changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size,
+                                              grec_type);
                break;
        }
 
+       if ((pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) ||
+           br_multicast_eht_should_del_pg(pg)) {
+               if (br_multicast_eht_should_del_pg(pg))
+                       pg->flags |= MDB_PG_FLAGS_FAST_LEAVE;
+               br_multicast_find_del_pg(pg->key.port->br, pg);
+               /* a notification has already been sent and we shouldn't
+                * access pg after the delete so we have to return false
+                */
+               changed = false;
+       }
+
        return changed;
 }
 
@@ -2257,8 +2320,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
        struct igmpv3_report *ih;
        struct igmpv3_grec *grec;
        int i, len, num, type;
+       __be32 group, *h_addr;
        bool changed = false;
-       __be32 group;
        int err = 0;
        u16 nsrcs;
 
@@ -2318,32 +2381,33 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
                pg = br_multicast_find_port(mdst, port, src);
                if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
                        goto unlock_continue;
-               /* reload grec */
+               /* reload grec and host addr */
                grec = (void *)(skb->data + len - sizeof(*grec) - (nsrcs * 4));
+               h_addr = &ip_hdr(skb)->saddr;
                switch (type) {
                case IGMPV3_ALLOW_NEW_SOURCES:
-                       changed = br_multicast_isinc_allow(pg, grec->grec_src,
-                                                          nsrcs, sizeof(__be32));
+                       changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+                                                          nsrcs, sizeof(__be32), type);
                        break;
                case IGMPV3_MODE_IS_INCLUDE:
-                       changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs,
-                                                          sizeof(__be32));
+                       changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+                                                          nsrcs, sizeof(__be32), type);
                        break;
                case IGMPV3_MODE_IS_EXCLUDE:
-                       changed = br_multicast_isexc(pg, grec->grec_src, nsrcs,
-                                                    sizeof(__be32));
+                       changed = br_multicast_isexc(pg, h_addr, grec->grec_src,
+                                                    nsrcs, sizeof(__be32), type);
                        break;
                case IGMPV3_CHANGE_TO_INCLUDE:
-                       changed = br_multicast_toin(pg, grec->grec_src, nsrcs,
-                                                   sizeof(__be32));
+                       changed = br_multicast_toin(pg, h_addr, grec->grec_src,
+                                                   nsrcs, sizeof(__be32), type);
                        break;
                case IGMPV3_CHANGE_TO_EXCLUDE:
-                       changed = br_multicast_toex(pg, grec->grec_src, nsrcs,
-                                                   sizeof(__be32));
+                       changed = br_multicast_toex(pg, h_addr, grec->grec_src,
+                                                   nsrcs, sizeof(__be32), type);
                        break;
                case IGMPV3_BLOCK_OLD_SOURCES:
-                       changed = br_multicast_block(pg, grec->grec_src, nsrcs,
-                                                    sizeof(__be32));
+                       changed = br_multicast_block(pg, h_addr, grec->grec_src,
+                                                    nsrcs, sizeof(__be32), type);
                        break;
                }
                if (changed)
@@ -2367,6 +2431,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
        unsigned int nsrcs_offset;
        const unsigned char *src;
        struct icmp6hdr *icmp6h;
+       struct in6_addr *h_addr;
        struct mld2_grec *grec;
        unsigned int grec_len;
        bool changed = false;
@@ -2445,31 +2510,43 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
                pg = br_multicast_find_port(mdst, port, src);
                if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
                        goto unlock_continue;
+               h_addr = &ipv6_hdr(skb)->saddr;
                switch (grec->grec_type) {
                case MLD2_ALLOW_NEW_SOURCES:
-                       changed = br_multicast_isinc_allow(pg, grec->grec_src,
-                                                          nsrcs,
-                                                          sizeof(struct in6_addr));
+                       changed = br_multicast_isinc_allow(pg, h_addr,
+                                                          grec->grec_src, nsrcs,
+                                                          sizeof(struct in6_addr),
+                                                          grec->grec_type);
                        break;
                case MLD2_MODE_IS_INCLUDE:
-                       changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs,
-                                                          sizeof(struct in6_addr));
+                       changed = br_multicast_isinc_allow(pg, h_addr,
+                                                          grec->grec_src, nsrcs,
+                                                          sizeof(struct in6_addr),
+                                                          grec->grec_type);
                        break;
                case MLD2_MODE_IS_EXCLUDE:
-                       changed = br_multicast_isexc(pg, grec->grec_src, nsrcs,
-                                                    sizeof(struct in6_addr));
+                       changed = br_multicast_isexc(pg, h_addr,
+                                                    grec->grec_src, nsrcs,
+                                                    sizeof(struct in6_addr),
+                                                    grec->grec_type);
                        break;
                case MLD2_CHANGE_TO_INCLUDE:
-                       changed = br_multicast_toin(pg, grec->grec_src, nsrcs,
-                                                   sizeof(struct in6_addr));
+                       changed = br_multicast_toin(pg, h_addr,
+                                                   grec->grec_src, nsrcs,
+                                                   sizeof(struct in6_addr),
+                                                   grec->grec_type);
                        break;
                case MLD2_CHANGE_TO_EXCLUDE:
-                       changed = br_multicast_toex(pg, grec->grec_src, nsrcs,
-                                                   sizeof(struct in6_addr));
+                       changed = br_multicast_toex(pg, h_addr,
+                                                   grec->grec_src, nsrcs,
+                                                   sizeof(struct in6_addr),
+                                                   grec->grec_type);
                        break;
                case MLD2_BLOCK_OLD_SOURCES:
-                       changed = br_multicast_block(pg, grec->grec_src, nsrcs,
-                                                    sizeof(struct in6_addr));
+                       changed = br_multicast_block(pg, h_addr,
+                                                    grec->grec_src, nsrcs,
+                                                    sizeof(struct in6_addr),
+                                                    grec->grec_type);
                        break;
                }
                if (changed)
diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c
new file mode 100644 (file)
index 0000000..fea38b9
--- /dev/null
@@ -0,0 +1,878 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2020, Nikolay Aleksandrov <nikolay@nvidia.com>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/if_ether.h>
+#include <linux/igmp.h>
+#include <linux/in.h>
+#include <linux/jhash.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/inetdevice.h>
+#include <linux/mroute.h>
+#include <net/ip.h>
+#include <net/switchdev.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/mld.h>
+#include <net/ip6_checksum.h>
+#include <net/addrconf.h>
+#endif
+
+#include "br_private.h"
+#include "br_private_mcast_eht.h"
+
+static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg,
+                                          union net_bridge_eht_addr *src_addr,
+                                          union net_bridge_eht_addr *h_addr);
+static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+                                             union net_bridge_eht_addr *src_addr,
+                                             union net_bridge_eht_addr *h_addr,
+                                             int filter_mode,
+                                             bool allow_zero_src);
+
+static struct net_bridge_group_eht_host *
+br_multicast_eht_host_lookup(struct net_bridge_port_group *pg,
+                            union net_bridge_eht_addr *h_addr)
+{
+       struct rb_node *node = pg->eht_host_tree.rb_node;
+
+       while (node) {
+               struct net_bridge_group_eht_host *this;
+               int result;
+
+               this = rb_entry(node, struct net_bridge_group_eht_host,
+                               rb_node);
+               result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr));
+               if (result < 0)
+                       node = node->rb_left;
+               else if (result > 0)
+                       node = node->rb_right;
+               else
+                       return this;
+       }
+
+       return NULL;
+}
+
+static int br_multicast_eht_host_filter_mode(struct net_bridge_port_group *pg,
+                                            union net_bridge_eht_addr *h_addr)
+{
+       struct net_bridge_group_eht_host *eht_host;
+
+       eht_host = br_multicast_eht_host_lookup(pg, h_addr);
+       if (!eht_host)
+               return MCAST_INCLUDE;
+
+       return eht_host->filter_mode;
+}
+
+static struct net_bridge_group_eht_set_entry *
+br_multicast_eht_set_entry_lookup(struct net_bridge_group_eht_set *eht_set,
+                                 union net_bridge_eht_addr *h_addr)
+{
+       struct rb_node *node = eht_set->entry_tree.rb_node;
+
+       while (node) {
+               struct net_bridge_group_eht_set_entry *this;
+               int result;
+
+               this = rb_entry(node, struct net_bridge_group_eht_set_entry,
+                               rb_node);
+               result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr));
+               if (result < 0)
+                       node = node->rb_left;
+               else if (result > 0)
+                       node = node->rb_right;
+               else
+                       return this;
+       }
+
+       return NULL;
+}
+
+static struct net_bridge_group_eht_set *
+br_multicast_eht_set_lookup(struct net_bridge_port_group *pg,
+                           union net_bridge_eht_addr *src_addr)
+{
+       struct rb_node *node = pg->eht_set_tree.rb_node;
+
+       while (node) {
+               struct net_bridge_group_eht_set *this;
+               int result;
+
+               this = rb_entry(node, struct net_bridge_group_eht_set,
+                               rb_node);
+               result = memcmp(src_addr, &this->src_addr, sizeof(*src_addr));
+               if (result < 0)
+                       node = node->rb_left;
+               else if (result > 0)
+                       node = node->rb_right;
+               else
+                       return this;
+       }
+
+       return NULL;
+}
+
+static void __eht_destroy_host(struct net_bridge_group_eht_host *eht_host)
+{
+       WARN_ON(!hlist_empty(&eht_host->set_entries));
+
+       br_multicast_eht_hosts_dec(eht_host->pg);
+
+       rb_erase(&eht_host->rb_node, &eht_host->pg->eht_host_tree);
+       RB_CLEAR_NODE(&eht_host->rb_node);
+       kfree(eht_host);
+}
+
+static void br_multicast_destroy_eht_set_entry(struct net_bridge_mcast_gc *gc)
+{
+       struct net_bridge_group_eht_set_entry *set_h;
+
+       set_h = container_of(gc, struct net_bridge_group_eht_set_entry, mcast_gc);
+       WARN_ON(!RB_EMPTY_NODE(&set_h->rb_node));
+
+       del_timer_sync(&set_h->timer);
+       kfree(set_h);
+}
+
+static void br_multicast_destroy_eht_set(struct net_bridge_mcast_gc *gc)
+{
+       struct net_bridge_group_eht_set *eht_set;
+
+       eht_set = container_of(gc, struct net_bridge_group_eht_set, mcast_gc);
+       WARN_ON(!RB_EMPTY_NODE(&eht_set->rb_node));
+       WARN_ON(!RB_EMPTY_ROOT(&eht_set->entry_tree));
+
+       del_timer_sync(&eht_set->timer);
+       kfree(eht_set);
+}
+
+static void __eht_del_set_entry(struct net_bridge_group_eht_set_entry *set_h)
+{
+       struct net_bridge_group_eht_host *eht_host = set_h->h_parent;
+       union net_bridge_eht_addr zero_addr;
+
+       rb_erase(&set_h->rb_node, &set_h->eht_set->entry_tree);
+       RB_CLEAR_NODE(&set_h->rb_node);
+       hlist_del_init(&set_h->host_list);
+       memset(&zero_addr, 0, sizeof(zero_addr));
+       if (memcmp(&set_h->h_addr, &zero_addr, sizeof(zero_addr)))
+               eht_host->num_entries--;
+       hlist_add_head(&set_h->mcast_gc.gc_node, &set_h->br->mcast_gc_list);
+       queue_work(system_long_wq, &set_h->br->mcast_gc_work);
+
+       if (hlist_empty(&eht_host->set_entries))
+               __eht_destroy_host(eht_host);
+}
+
+static void br_multicast_del_eht_set(struct net_bridge_group_eht_set *eht_set)
+{
+       struct net_bridge_group_eht_set_entry *set_h;
+       struct rb_node *node;
+
+       while ((node = rb_first(&eht_set->entry_tree))) {
+               set_h = rb_entry(node, struct net_bridge_group_eht_set_entry,
+                                rb_node);
+               __eht_del_set_entry(set_h);
+       }
+
+       rb_erase(&eht_set->rb_node, &eht_set->pg->eht_set_tree);
+       RB_CLEAR_NODE(&eht_set->rb_node);
+       hlist_add_head(&eht_set->mcast_gc.gc_node, &eht_set->br->mcast_gc_list);
+       queue_work(system_long_wq, &eht_set->br->mcast_gc_work);
+}
+
+void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg)
+{
+       struct net_bridge_group_eht_set *eht_set;
+       struct rb_node *node;
+
+       while ((node = rb_first(&pg->eht_set_tree))) {
+               eht_set = rb_entry(node, struct net_bridge_group_eht_set,
+                                  rb_node);
+               br_multicast_del_eht_set(eht_set);
+       }
+}
+
+static void br_multicast_eht_set_entry_expired(struct timer_list *t)
+{
+       struct net_bridge_group_eht_set_entry *set_h = from_timer(set_h, t, timer);
+       struct net_bridge *br = set_h->br;
+
+       spin_lock(&br->multicast_lock);
+       if (RB_EMPTY_NODE(&set_h->rb_node) || timer_pending(&set_h->timer))
+               goto out;
+
+       br_multicast_del_eht_set_entry(set_h->eht_set->pg,
+                                      &set_h->eht_set->src_addr,
+                                      &set_h->h_addr);
+out:
+       spin_unlock(&br->multicast_lock);
+}
+
+static void br_multicast_eht_set_expired(struct timer_list *t)
+{
+       struct net_bridge_group_eht_set *eht_set = from_timer(eht_set, t,
+                                                             timer);
+       struct net_bridge *br = eht_set->br;
+
+       spin_lock(&br->multicast_lock);
+       if (RB_EMPTY_NODE(&eht_set->rb_node) || timer_pending(&eht_set->timer))
+               goto out;
+
+       br_multicast_del_eht_set(eht_set);
+out:
+       spin_unlock(&br->multicast_lock);
+}
+
+static struct net_bridge_group_eht_host *
+__eht_lookup_create_host(struct net_bridge_port_group *pg,
+                        union net_bridge_eht_addr *h_addr,
+                        unsigned char filter_mode)
+{
+       struct rb_node **link = &pg->eht_host_tree.rb_node, *parent = NULL;
+       struct net_bridge_group_eht_host *eht_host;
+
+       while (*link) {
+               struct net_bridge_group_eht_host *this;
+               int result;
+
+               this = rb_entry(*link, struct net_bridge_group_eht_host,
+                               rb_node);
+               result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr));
+               parent = *link;
+               if (result < 0)
+                       link = &((*link)->rb_left);
+               else if (result > 0)
+                       link = &((*link)->rb_right);
+               else
+                       return this;
+       }
+
+       if (br_multicast_eht_hosts_over_limit(pg))
+               return NULL;
+
+       eht_host = kzalloc(sizeof(*eht_host), GFP_ATOMIC);
+       if (!eht_host)
+               return NULL;
+
+       memcpy(&eht_host->h_addr, h_addr, sizeof(*h_addr));
+       INIT_HLIST_HEAD(&eht_host->set_entries);
+       eht_host->pg = pg;
+       eht_host->filter_mode = filter_mode;
+
+       rb_link_node(&eht_host->rb_node, parent, link);
+       rb_insert_color(&eht_host->rb_node, &pg->eht_host_tree);
+
+       br_multicast_eht_hosts_inc(pg);
+
+       return eht_host;
+}
+
+static struct net_bridge_group_eht_set_entry *
+__eht_lookup_create_set_entry(struct net_bridge *br,
+                             struct net_bridge_group_eht_set *eht_set,
+                             struct net_bridge_group_eht_host *eht_host,
+                             bool allow_zero_src)
+{
+       struct rb_node **link = &eht_set->entry_tree.rb_node, *parent = NULL;
+       struct net_bridge_group_eht_set_entry *set_h;
+
+       while (*link) {
+               struct net_bridge_group_eht_set_entry *this;
+               int result;
+
+               this = rb_entry(*link, struct net_bridge_group_eht_set_entry,
+                               rb_node);
+               result = memcmp(&eht_host->h_addr, &this->h_addr,
+                               sizeof(union net_bridge_eht_addr));
+               parent = *link;
+               if (result < 0)
+                       link = &((*link)->rb_left);
+               else if (result > 0)
+                       link = &((*link)->rb_right);
+               else
+                       return this;
+       }
+
+       /* always allow auto-created zero entry */
+       if (!allow_zero_src && eht_host->num_entries >= PG_SRC_ENT_LIMIT)
+               return NULL;
+
+       set_h = kzalloc(sizeof(*set_h), GFP_ATOMIC);
+       if (!set_h)
+               return NULL;
+
+       memcpy(&set_h->h_addr, &eht_host->h_addr,
+              sizeof(union net_bridge_eht_addr));
+       set_h->mcast_gc.destroy = br_multicast_destroy_eht_set_entry;
+       set_h->eht_set = eht_set;
+       set_h->h_parent = eht_host;
+       set_h->br = br;
+       timer_setup(&set_h->timer, br_multicast_eht_set_entry_expired, 0);
+
+       hlist_add_head(&set_h->host_list, &eht_host->set_entries);
+       rb_link_node(&set_h->rb_node, parent, link);
+       rb_insert_color(&set_h->rb_node, &eht_set->entry_tree);
+       /* we must not count the auto-created zero entry otherwise we won't be
+        * able to track the full list of PG_SRC_ENT_LIMIT entries
+        */
+       if (!allow_zero_src)
+               eht_host->num_entries++;
+
+       return set_h;
+}
+
+static struct net_bridge_group_eht_set *
+__eht_lookup_create_set(struct net_bridge_port_group *pg,
+                       union net_bridge_eht_addr *src_addr)
+{
+       struct rb_node **link = &pg->eht_set_tree.rb_node, *parent = NULL;
+       struct net_bridge_group_eht_set *eht_set;
+
+       while (*link) {
+               struct net_bridge_group_eht_set *this;
+               int result;
+
+               this = rb_entry(*link, struct net_bridge_group_eht_set,
+                               rb_node);
+               result = memcmp(src_addr, &this->src_addr, sizeof(*src_addr));
+               parent = *link;
+               if (result < 0)
+                       link = &((*link)->rb_left);
+               else if (result > 0)
+                       link = &((*link)->rb_right);
+               else
+                       return this;
+       }
+
+       eht_set = kzalloc(sizeof(*eht_set), GFP_ATOMIC);
+       if (!eht_set)
+               return NULL;
+
+       memcpy(&eht_set->src_addr, src_addr, sizeof(*src_addr));
+       eht_set->mcast_gc.destroy = br_multicast_destroy_eht_set;
+       eht_set->pg = pg;
+       eht_set->br = pg->key.port->br;
+       eht_set->entry_tree = RB_ROOT;
+       timer_setup(&eht_set->timer, br_multicast_eht_set_expired, 0);
+
+       rb_link_node(&eht_set->rb_node, parent, link);
+       rb_insert_color(&eht_set->rb_node, &pg->eht_set_tree);
+
+       return eht_set;
+}
+
+static void br_multicast_ip_src_to_eht_addr(const struct br_ip *src,
+                                           union net_bridge_eht_addr *dest)
+{
+       switch (src->proto) {
+       case htons(ETH_P_IP):
+               dest->ip4 = src->src.ip4;
+               break;
+#if IS_ENABLED(CONFIG_IPV6)
+       case htons(ETH_P_IPV6):
+               memcpy(&dest->ip6, &src->src.ip6, sizeof(struct in6_addr));
+               break;
+#endif
+       }
+}
+
+static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg,
+                                           union net_bridge_eht_addr *h_addr,
+                                           int filter_mode)
+{
+       struct net_bridge_group_eht_host *eht_host;
+       union net_bridge_eht_addr zero_addr;
+
+       eht_host = br_multicast_eht_host_lookup(pg, h_addr);
+       if (eht_host)
+               eht_host->filter_mode = filter_mode;
+
+       memset(&zero_addr, 0, sizeof(zero_addr));
+       switch (filter_mode) {
+       case MCAST_INCLUDE:
+               br_multicast_del_eht_set_entry(pg, &zero_addr, h_addr);
+               break;
+       case MCAST_EXCLUDE:
+               br_multicast_create_eht_set_entry(pg, &zero_addr, h_addr,
+                                                 MCAST_EXCLUDE,
+                                                 true);
+               break;
+       }
+}
+
+static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+                                             union net_bridge_eht_addr *src_addr,
+                                             union net_bridge_eht_addr *h_addr,
+                                             int filter_mode,
+                                             bool allow_zero_src)
+{
+       struct net_bridge_group_eht_set_entry *set_h;
+       struct net_bridge_group_eht_host *eht_host;
+       struct net_bridge *br = pg->key.port->br;
+       struct net_bridge_group_eht_set *eht_set;
+       union net_bridge_eht_addr zero_addr;
+
+       memset(&zero_addr, 0, sizeof(zero_addr));
+       if (!allow_zero_src && !memcmp(src_addr, &zero_addr, sizeof(zero_addr)))
+               return;
+
+       eht_set = __eht_lookup_create_set(pg, src_addr);
+       if (!eht_set)
+               return;
+
+       eht_host = __eht_lookup_create_host(pg, h_addr, filter_mode);
+       if (!eht_host)
+               goto fail_host;
+
+       set_h = __eht_lookup_create_set_entry(br, eht_set, eht_host,
+                                             allow_zero_src);
+       if (!set_h)
+               goto fail_set_entry;
+
+       mod_timer(&set_h->timer, jiffies + br_multicast_gmi(br));
+       mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(br));
+
+       return;
+
+fail_set_entry:
+       if (hlist_empty(&eht_host->set_entries))
+               __eht_destroy_host(eht_host);
+fail_host:
+       if (RB_EMPTY_ROOT(&eht_set->entry_tree))
+               br_multicast_del_eht_set(eht_set);
+}
+
+static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg,
+                                          union net_bridge_eht_addr *src_addr,
+                                          union net_bridge_eht_addr *h_addr)
+{
+       struct net_bridge_group_eht_set_entry *set_h;
+       struct net_bridge_group_eht_set *eht_set;
+       bool set_deleted = false;
+
+       eht_set = br_multicast_eht_set_lookup(pg, src_addr);
+       if (!eht_set)
+               goto out;
+
+       set_h = br_multicast_eht_set_entry_lookup(eht_set, h_addr);
+       if (!set_h)
+               goto out;
+
+       __eht_del_set_entry(set_h);
+
+       if (RB_EMPTY_ROOT(&eht_set->entry_tree)) {
+               br_multicast_del_eht_set(eht_set);
+               set_deleted = true;
+       }
+
+out:
+       return set_deleted;
+}
+
+static void br_multicast_del_eht_host(struct net_bridge_port_group *pg,
+                                     union net_bridge_eht_addr *h_addr)
+{
+       struct net_bridge_group_eht_set_entry *set_h;
+       struct net_bridge_group_eht_host *eht_host;
+       struct hlist_node *tmp;
+
+       eht_host = br_multicast_eht_host_lookup(pg, h_addr);
+       if (!eht_host)
+               return;
+
+       hlist_for_each_entry_safe(set_h, tmp, &eht_host->set_entries, host_list)
+               br_multicast_del_eht_set_entry(set_h->eht_set->pg,
+                                              &set_h->eht_set->src_addr,
+                                              &set_h->h_addr);
+}
+
+static void __eht_allow_incl(struct net_bridge_port_group *pg,
+                            union net_bridge_eht_addr *h_addr,
+                            void *srcs,
+                            u32 nsrcs,
+                            size_t addr_size)
+{
+       union net_bridge_eht_addr eht_src_addr;
+       u32 src_idx;
+
+       memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+       for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+               memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+               br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+                                                 MCAST_INCLUDE,
+                                                 false);
+       }
+}
+
+static bool __eht_allow_excl(struct net_bridge_port_group *pg,
+                            union net_bridge_eht_addr *h_addr,
+                            void *srcs,
+                            u32 nsrcs,
+                            size_t addr_size)
+{
+       bool changed = false, host_excl = false;
+       union net_bridge_eht_addr eht_src_addr;
+       struct net_bridge_group_src *src_ent;
+       struct br_ip src_ip;
+       u32 src_idx;
+
+       host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE);
+       memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+       for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+               memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+               if (!host_excl) {
+                       br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+                                                         MCAST_INCLUDE,
+                                                         false);
+               } else {
+                       if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr,
+                                                           h_addr))
+                               continue;
+                       memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
+                       src_ent = br_multicast_find_group_src(pg, &src_ip);
+                       if (!src_ent)
+                               continue;
+                       br_multicast_del_group_src(src_ent, true);
+                       changed = true;
+               }
+       }
+
+       return changed;
+}
+
+static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
+                                  union net_bridge_eht_addr *h_addr,
+                                  void *srcs,
+                                  u32 nsrcs,
+                                  size_t addr_size)
+{
+       bool changed = false;
+
+       switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
+       case MCAST_INCLUDE:
+               __eht_allow_incl(pg, h_addr, srcs, nsrcs, addr_size);
+               break;
+       case MCAST_EXCLUDE:
+               changed = __eht_allow_excl(pg, h_addr, srcs, nsrcs, addr_size);
+               break;
+       }
+
+       return changed;
+}
+
+static bool __eht_block_incl(struct net_bridge_port_group *pg,
+                            union net_bridge_eht_addr *h_addr,
+                            void *srcs,
+                            u32 nsrcs,
+                            size_t addr_size)
+{
+       union net_bridge_eht_addr eht_src_addr;
+       struct net_bridge_group_src *src_ent;
+       bool changed = false;
+       struct br_ip src_ip;
+       u32 src_idx;
+
+       memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+       memset(&src_ip, 0, sizeof(src_ip));
+       src_ip.proto = pg->key.addr.proto;
+       for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+               memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+               if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr, h_addr))
+                       continue;
+               memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
+               src_ent = br_multicast_find_group_src(pg, &src_ip);
+               if (!src_ent)
+                       continue;
+               br_multicast_del_group_src(src_ent, true);
+               changed = true;
+       }
+
+       return changed;
+}
+
+static bool __eht_block_excl(struct net_bridge_port_group *pg,
+                            union net_bridge_eht_addr *h_addr,
+                            void *srcs,
+                            u32 nsrcs,
+                            size_t addr_size)
+{
+       bool changed = false, host_excl = false;
+       union net_bridge_eht_addr eht_src_addr;
+       struct net_bridge_group_src *src_ent;
+       struct br_ip src_ip;
+       u32 src_idx;
+
+       host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE);
+       memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+       memset(&src_ip, 0, sizeof(src_ip));
+       src_ip.proto = pg->key.addr.proto;
+       for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+               memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+               if (host_excl) {
+                       br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+                                                         MCAST_EXCLUDE,
+                                                         false);
+               } else {
+                       if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr,
+                                                           h_addr))
+                               continue;
+                       memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
+                       src_ent = br_multicast_find_group_src(pg, &src_ip);
+                       if (!src_ent)
+                               continue;
+                       br_multicast_del_group_src(src_ent, true);
+                       changed = true;
+               }
+       }
+
+       return changed;
+}
+
+static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
+                                  union net_bridge_eht_addr *h_addr,
+                                  void *srcs,
+                                  u32 nsrcs,
+                                  size_t addr_size)
+{
+       bool changed = false;
+
+       switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
+       case MCAST_INCLUDE:
+               changed = __eht_block_incl(pg, h_addr, srcs, nsrcs, addr_size);
+               break;
+       case MCAST_EXCLUDE:
+               changed = __eht_block_excl(pg, h_addr, srcs, nsrcs, addr_size);
+               break;
+       }
+
+       return changed;
+}
+
+/* flush_entries is true when changing mode */
+static bool __eht_inc_exc(struct net_bridge_port_group *pg,
+                         union net_bridge_eht_addr *h_addr,
+                         void *srcs,
+                         u32 nsrcs,
+                         size_t addr_size,
+                         unsigned char filter_mode,
+                         bool to_report)
+{
+       bool changed = false, flush_entries = to_report;
+       union net_bridge_eht_addr eht_src_addr;
+       u32 src_idx;
+
+       if (br_multicast_eht_host_filter_mode(pg, h_addr) != filter_mode)
+               flush_entries = true;
+
+       memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+       /* if we're changing mode del host and its entries */
+       if (flush_entries)
+               br_multicast_del_eht_host(pg, h_addr);
+       for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+               memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+               br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+                                                 filter_mode, false);
+       }
+       /* we can be missing sets only if we've deleted some entries */
+       if (flush_entries) {
+               struct net_bridge *br = pg->key.port->br;
+               struct net_bridge_group_eht_set *eht_set;
+               struct net_bridge_group_src *src_ent;
+               struct hlist_node *tmp;
+
+               hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) {
+                       br_multicast_ip_src_to_eht_addr(&src_ent->addr,
+                                                       &eht_src_addr);
+                       if (!br_multicast_eht_set_lookup(pg, &eht_src_addr)) {
+                               br_multicast_del_group_src(src_ent, true);
+                               changed = true;
+                               continue;
+                       }
+                       /* this is an optimization for TO_INCLUDE where we lower
+                        * the set's timeout to LMQT to catch timeout hosts:
+                        * - host A (timing out): set entries X, Y
+                        * - host B: set entry Z (new from current TO_INCLUDE)
+                        *           sends BLOCK Z after LMQT but host A's EHT
+                        *           entries still exist (unless lowered to LMQT
+                        *           so they can timeout with the S,Gs)
+                        * => we wait another LMQT, when we can just delete the
+                        *    group immediately
+                        */
+                       if (!(src_ent->flags & BR_SGRP_F_SEND) ||
+                           filter_mode != MCAST_INCLUDE ||
+                           !to_report)
+                               continue;
+                       eht_set = br_multicast_eht_set_lookup(pg,
+                                                             &eht_src_addr);
+                       if (!eht_set)
+                               continue;
+                       mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(br));
+               }
+       }
+
+       return changed;
+}
+
+static bool br_multicast_eht_inc(struct net_bridge_port_group *pg,
+                                union net_bridge_eht_addr *h_addr,
+                                void *srcs,
+                                u32 nsrcs,
+                                size_t addr_size,
+                                bool to_report)
+{
+       bool changed;
+
+       changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+                               MCAST_INCLUDE, to_report);
+       br_eht_convert_host_filter_mode(pg, h_addr, MCAST_INCLUDE);
+
+       return changed;
+}
+
+static bool br_multicast_eht_exc(struct net_bridge_port_group *pg,
+                                union net_bridge_eht_addr *h_addr,
+                                void *srcs,
+                                u32 nsrcs,
+                                size_t addr_size,
+                                bool to_report)
+{
+       bool changed;
+
+       changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+                               MCAST_EXCLUDE, to_report);
+       br_eht_convert_host_filter_mode(pg, h_addr, MCAST_EXCLUDE);
+
+       return changed;
+}
+
+static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
+                            union net_bridge_eht_addr *h_addr,
+                            void *srcs,
+                            u32 nsrcs,
+                            int grec_type)
+{
+       bool changed = false, to_report = false;
+
+       switch (grec_type) {
+       case IGMPV3_ALLOW_NEW_SOURCES:
+               br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, sizeof(__be32));
+               break;
+       case IGMPV3_BLOCK_OLD_SOURCES:
+               changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+                                                sizeof(__be32));
+               break;
+       case IGMPV3_CHANGE_TO_INCLUDE:
+               to_report = true;
+               fallthrough;
+       case IGMPV3_MODE_IS_INCLUDE:
+               changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+                                              sizeof(__be32), to_report);
+               break;
+       case IGMPV3_CHANGE_TO_EXCLUDE:
+               to_report = true;
+               fallthrough;
+       case IGMPV3_MODE_IS_EXCLUDE:
+               changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+                                              sizeof(__be32), to_report);
+               break;
+       }
+
+       return changed;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
+                            union net_bridge_eht_addr *h_addr,
+                            void *srcs,
+                            u32 nsrcs,
+                            int grec_type)
+{
+       bool changed = false, to_report = false;
+
+       switch (grec_type) {
+       case MLD2_ALLOW_NEW_SOURCES:
+               br_multicast_eht_allow(pg, h_addr, srcs, nsrcs,
+                                      sizeof(struct in6_addr));
+               break;
+       case MLD2_BLOCK_OLD_SOURCES:
+               changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+                                                sizeof(struct in6_addr));
+               break;
+       case MLD2_CHANGE_TO_INCLUDE:
+               to_report = true;
+               fallthrough;
+       case MLD2_MODE_IS_INCLUDE:
+               changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+                                              sizeof(struct in6_addr),
+                                              to_report);
+               break;
+       case MLD2_CHANGE_TO_EXCLUDE:
+               to_report = true;
+               fallthrough;
+       case MLD2_MODE_IS_EXCLUDE:
+               changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+                                              sizeof(struct in6_addr),
+                                              to_report);
+               break;
+       }
+
+       return changed;
+}
+#endif
+
+/* true means an entry was deleted */
+bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+                            void *h_addr,
+                            void *srcs,
+                            u32 nsrcs,
+                            size_t addr_size,
+                            int grec_type)
+{
+       bool eht_enabled = !!(pg->key.port->flags & BR_MULTICAST_FAST_LEAVE);
+       union net_bridge_eht_addr eht_host_addr;
+       bool changed = false;
+
+       if (!eht_enabled)
+               goto out;
+
+       memset(&eht_host_addr, 0, sizeof(eht_host_addr));
+       memcpy(&eht_host_addr, h_addr, addr_size);
+       if (addr_size == sizeof(__be32))
+               changed = __eht_ip4_handle(pg, &eht_host_addr, srcs, nsrcs,
+                                          grec_type);
+#if IS_ENABLED(CONFIG_IPV6)
+       else
+               changed = __eht_ip6_handle(pg, &eht_host_addr, srcs, nsrcs,
+                                          grec_type);
+#endif
+
+out:
+       return changed;
+}
+
+int br_multicast_eht_set_hosts_limit(struct net_bridge_port *p,
+                                    u32 eht_hosts_limit)
+{
+       struct net_bridge *br = p->br;
+
+       if (!eht_hosts_limit)
+               return -EINVAL;
+
+       spin_lock_bh(&br->multicast_lock);
+       p->multicast_eht_hosts_limit = eht_hosts_limit;
+       spin_unlock_bh(&br->multicast_lock);
+
+       return 0;
+}
index 762f273..bd3962d 100644 (file)
@@ -18,6 +18,7 @@
 #include "br_private_stp.h"
 #include "br_private_cfm.h"
 #include "br_private_tunnel.h"
+#include "br_private_mcast_eht.h"
 
 static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg,
                                u32 filter_mask)
@@ -199,6 +200,8 @@ static inline size_t br_port_info_size(void)
                + nla_total_size(sizeof(u16))   /* IFLA_BRPORT_GROUP_FWD_MASK */
                + nla_total_size(sizeof(u8))    /* IFLA_BRPORT_MRP_RING_OPEN */
                + nla_total_size(sizeof(u8))    /* IFLA_BRPORT_MRP_IN_OPEN */
+               + nla_total_size(sizeof(u32))   /* IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT */
+               + nla_total_size(sizeof(u32))   /* IFLA_BRPORT_MCAST_EHT_HOSTS_CNT */
                + 0;
 }
 
@@ -283,7 +286,11 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
        if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER,
-                      p->multicast_router))
+                      p->multicast_router) ||
+           nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
+                       p->multicast_eht_hosts_limit) ||
+           nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
+                       p->multicast_eht_hosts_cnt))
                return -EMSGSIZE;
 #endif
 
@@ -820,6 +827,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
        [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
        [IFLA_BRPORT_ISOLATED]  = { .type = NLA_U8 },
        [IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
+       [IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
 };
 
 /* Change the state of the port and notify spanning tree */
@@ -955,6 +963,15 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
                if (err)
                        return err;
        }
+
+       if (tb[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT]) {
+               u32 hlimit;
+
+               hlimit = nla_get_u32(tb[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT]);
+               err = br_multicast_eht_set_hosts_limit(p, hlimit);
+               if (err)
+                       return err;
+       }
 #endif
 
        if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
index d62c6e1..d242ba6 100644 (file)
@@ -252,6 +252,8 @@ struct net_bridge_port_group {
        struct timer_list               timer;
        struct timer_list               rexmit_timer;
        struct hlist_node               mglist;
+       struct rb_root                  eht_set_tree;
+       struct rb_root                  eht_host_tree;
 
        struct rhash_head               rhnode;
        struct net_bridge_mcast_gc      mcast_gc;
@@ -308,6 +310,8 @@ struct net_bridge_port {
 #if IS_ENABLED(CONFIG_IPV6)
        struct bridge_mcast_own_query   ip6_own_query;
 #endif /* IS_ENABLED(CONFIG_IPV6) */
+       u32                             multicast_eht_hosts_limit;
+       u32                             multicast_eht_hosts_cnt;
        unsigned char                   multicast_router;
        struct bridge_mcast_stats       __percpu *mcast_stats;
        struct timer_list               multicast_router_timer;
@@ -846,6 +850,10 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
                                     u8 filter_mode);
 void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
                                       struct net_bridge_port_group *sg);
+struct net_bridge_group_src *
+br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip);
+void br_multicast_del_group_src(struct net_bridge_group_src *src,
+                               bool fastleave);
 
 static inline bool br_group_is_l2(const struct br_ip *group)
 {
diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h
new file mode 100644 (file)
index 0000000..f89049f
--- /dev/null
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (c) 2020, Nikolay Aleksandrov <nikolay@nvidia.com>
+ */
+#ifndef _BR_PRIVATE_MCAST_EHT_H_
+#define _BR_PRIVATE_MCAST_EHT_H_
+
+#define BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT 512
+
+union net_bridge_eht_addr {
+       __be32                          ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+       struct in6_addr                 ip6;
+#endif
+};
+
+/* single host's list of set entries and filter_mode */
+struct net_bridge_group_eht_host {
+       struct rb_node                  rb_node;
+
+       union net_bridge_eht_addr       h_addr;
+       struct hlist_head               set_entries;
+       unsigned int                    num_entries;
+       unsigned char                   filter_mode;
+       struct net_bridge_port_group    *pg;
+};
+
+/* (host, src entry) added to a per-src set and host's list */
+struct net_bridge_group_eht_set_entry {
+       struct rb_node                  rb_node;
+       struct hlist_node               host_list;
+
+       union net_bridge_eht_addr       h_addr;
+       struct timer_list               timer;
+       struct net_bridge               *br;
+       struct net_bridge_group_eht_set *eht_set;
+       struct net_bridge_group_eht_host *h_parent;
+       struct net_bridge_mcast_gc      mcast_gc;
+};
+
+/* per-src set */
+struct net_bridge_group_eht_set {
+       struct rb_node                  rb_node;
+
+       union net_bridge_eht_addr       src_addr;
+       struct rb_root                  entry_tree;
+       struct timer_list               timer;
+       struct net_bridge_port_group    *pg;
+       struct net_bridge               *br;
+       struct net_bridge_mcast_gc      mcast_gc;
+};
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg);
+bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+                            void *h_addr,
+                            void *srcs,
+                            u32 nsrcs,
+                            size_t addr_size,
+                            int grec_type);
+int br_multicast_eht_set_hosts_limit(struct net_bridge_port *p,
+                                    u32 eht_hosts_limit);
+
+static inline bool
+br_multicast_eht_should_del_pg(const struct net_bridge_port_group *pg)
+{
+       return !!((pg->key.port->flags & BR_MULTICAST_FAST_LEAVE) &&
+                 RB_EMPTY_ROOT(&pg->eht_host_tree));
+}
+
+static inline bool
+br_multicast_eht_hosts_over_limit(const struct net_bridge_port_group *pg)
+{
+       const struct net_bridge_port *p = pg->key.port;
+
+       return !!(p->multicast_eht_hosts_cnt >= p->multicast_eht_hosts_limit);
+}
+
+static inline void br_multicast_eht_hosts_inc(struct net_bridge_port_group *pg)
+{
+       struct net_bridge_port *p = pg->key.port;
+
+       p->multicast_eht_hosts_cnt++;
+}
+
+static inline void br_multicast_eht_hosts_dec(struct net_bridge_port_group *pg)
+{
+       struct net_bridge_port *p = pg->key.port;
+
+       p->multicast_eht_hosts_cnt--;
+}
+#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
+
+#endif /* _BR_PRIVATE_MCAST_EHT_H_ */
index 7a59cdd..b66305f 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/sched/signal.h>
 
 #include "br_private.h"
+#include "br_private_mcast_eht.h"
 
 struct brport_attribute {
        struct attribute        attr;
@@ -245,6 +246,29 @@ static int store_multicast_router(struct net_bridge_port *p,
 static BRPORT_ATTR(multicast_router, 0644, show_multicast_router,
                   store_multicast_router);
 
+static ssize_t show_multicast_eht_hosts_limit(struct net_bridge_port *p,
+                                             char *buf)
+{
+       return sprintf(buf, "%u\n", p->multicast_eht_hosts_limit);
+}
+
+static int store_multicast_eht_hosts_limit(struct net_bridge_port *p,
+                                          unsigned long v)
+{
+       return br_multicast_eht_set_hosts_limit(p, v);
+}
+static BRPORT_ATTR(multicast_eht_hosts_limit, 0644,
+                  show_multicast_eht_hosts_limit,
+                  store_multicast_eht_hosts_limit);
+
+static ssize_t show_multicast_eht_hosts_cnt(struct net_bridge_port *p,
+                                           char *buf)
+{
+       return sprintf(buf, "%u\n", p->multicast_eht_hosts_cnt);
+}
+static BRPORT_ATTR(multicast_eht_hosts_cnt, 0444, show_multicast_eht_hosts_cnt,
+                  NULL);
+
 BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE);
 BRPORT_ATTR_FLAG(multicast_to_unicast, BR_MULTICAST_TO_UNICAST);
 #endif
@@ -274,6 +298,8 @@ static const struct brport_attribute *brport_attrs[] = {
        &brport_attr_multicast_router,
        &brport_attr_multicast_fast_leave,
        &brport_attr_multicast_to_unicast,
+       &brport_attr_multicast_eht_hosts_limit,
+       &brport_attr_multicast_eht_hosts_cnt,
 #endif
        &brport_attr_proxyarp,
        &brport_attr_proxyarp_wifi,
index 7c9958d..a9ac5ff 100644 (file)
@@ -4,7 +4,6 @@
 #
 
 menuconfig CAN
-       depends on NET
        tristate "CAN bus subsystem support"
        help
          Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial
index 8598d9d..ba41248 100644 (file)
@@ -225,7 +225,7 @@ static void mod_store_ccdlc(struct canfd_frame *cf)
        if (ccf->len <= CAN_MAX_DLEN)
                return;
 
-       /* potentially broken values are catched in can_can_gw_rcv() */
+       /* potentially broken values are caught in can_can_gw_rcv() */
        if (ccf->len > CAN_MAX_RAW_DLC)
                return;
 
index d9ce02e..6df3f1b 100644 (file)
@@ -4084,7 +4084,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
        skb_reset_mac_header(skb);
 
        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
-               __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
+               __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
 
        /* Disable soft irqs for various locks below. Also
         * stops preemption for RCU.
index 738d434..72ea798 100644 (file)
@@ -8617,6 +8617,10 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
  *     @resource_id: resource's id
  *     @parent_resource_id: resource's parent id
  *     @size_params: size parameters
+ *
+ *     Generic resources should reuse the same names across drivers.
+ *     Please see the generic resources list at:
+ *     Documentation/networking/devlink/devlink-resource.rst
  */
 int devlink_resource_register(struct devlink *devlink,
                              const char *resource_name,
index 1059786..3fba429 100644 (file)
@@ -3464,7 +3464,7 @@ static int pktgen_thread_worker(void *arg)
        struct pktgen_dev *pkt_dev = NULL;
        int cpu = t->cpu;
 
-       BUG_ON(smp_processor_id() != cpu);
+       WARN_ON(smp_processor_id() != cpu);
 
        init_waitqueue_head(&t->queue);
        complete(&t->start_done);
index 3d6ab19..c313aaf 100644 (file)
@@ -55,7 +55,7 @@
 #include <net/net_namespace.h>
 
 #define RTNL_MAX_TYPE          50
-#define RTNL_SLAVE_MAX_TYPE    36
+#define RTNL_SLAVE_MAX_TYPE    40
 
 struct rtnl_link {
        rtnl_doit_func          doit;
index 145503d..2af12f7 100644 (file)
@@ -4721,6 +4721,7 @@ err:
 EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
 
 void __skb_tstamp_tx(struct sk_buff *orig_skb,
+                    const struct sk_buff *ack_skb,
                     struct skb_shared_hwtstamps *hwtstamps,
                     struct sock *sk, int tstype)
 {
@@ -4743,7 +4744,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
                if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
                    sk->sk_protocol == IPPROTO_TCP &&
                    sk->sk_type == SOCK_STREAM) {
-                       skb = tcp_get_timestamping_opt_stats(sk, orig_skb);
+                       skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
+                                                            ack_skb);
                        opt_stats = true;
                } else
 #endif
@@ -4772,7 +4774,7 @@ EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
 void skb_tstamp_tx(struct sk_buff *orig_skb,
                   struct skb_shared_hwtstamps *hwtstamps)
 {
-       return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
+       return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk,
                               SCM_TSTAMP_SND);
 }
 EXPORT_SYMBOL_GPL(skb_tstamp_tx);
index d86d8d1..4567de5 100644 (file)
@@ -309,7 +309,6 @@ proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
 #endif
 
 static struct ctl_table net_core_table[] = {
-#ifdef CONFIG_NET
        {
                .procname       = "wmem_max",
                .data           = &sysctl_wmem_max,
@@ -507,7 +506,6 @@ static struct ctl_table net_core_table[] = {
                .proc_handler   = set_default_qdisc
        },
 #endif
-#endif /* CONFIG_NET */
        {
                .procname       = "netdev_budget",
                .data           = &netdev_budget,
index 3016e5a..2c0fa16 100644 (file)
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_DCB) += dcbnl.o dcbevent.o
+obj-y += dcbnl.o dcbevent.o
index 255df9b..155b061 100644 (file)
@@ -4,7 +4,7 @@
 #
 config DNS_RESOLVER
        tristate "DNS Resolver support"
-       depends on NET && KEYS
+       depends on KEYS
        help
          Saying Y here will include support for the DNS Resolver key type
          which can be used to make upcalls to perform DNS lookups in
index 24036e3..1812201 100644 (file)
@@ -68,6 +68,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
        [NETIF_F_HW_TLS_RX_BIT] =        "tls-hw-rx-offload",
        [NETIF_F_GRO_FRAGLIST_BIT] =     "rx-gro-list",
        [NETIF_F_HW_MACSEC_BIT] =        "macsec-hw-offload",
+       [NETIF_F_GRO_UDP_FWD_BIT] =      "rx-udp-gro-forwarding",
 };
 
 const char
index bcf6505..de36a5b 100644 (file)
@@ -4,7 +4,6 @@
 #
 
 menuconfig NET_IFE
-       depends on NET
        tristate "Inter-FE based on IETF ForCES InterFE LFB"
        default n
        help
index 856ae51..e1a17c6 100644 (file)
 #include <asm/ioctls.h>
 #include <net/busy_poll.h>
 
+/* Track pending CMSGs. */
+enum {
+       TCP_CMSG_INQ = 1,
+       TCP_CMSG_TS = 2
+};
+
 struct percpu_counter tcp_orphan_count;
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
 
@@ -1739,6 +1745,20 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(tcp_set_rcvlowat);
 
+static void tcp_update_recv_tstamps(struct sk_buff *skb,
+                                   struct scm_timestamping_internal *tss)
+{
+       if (skb->tstamp)
+               tss->ts[0] = ktime_to_timespec64(skb->tstamp);
+       else
+               tss->ts[0] = (struct timespec64) {0};
+
+       if (skb_hwtstamps(skb)->hwtstamp)
+               tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp);
+       else
+               tss->ts[2] = (struct timespec64) {0};
+}
+
 #ifdef CONFIG_MMU
 static const struct vm_operations_struct tcp_vm_ops = {
 };
@@ -1842,13 +1862,13 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
                              struct scm_timestamping_internal *tss,
                              int *cmsg_flags);
 static int receive_fallback_to_copy(struct sock *sk,
-                                   struct tcp_zerocopy_receive *zc, int inq)
+                                   struct tcp_zerocopy_receive *zc, int inq,
+                                   struct scm_timestamping_internal *tss)
 {
        unsigned long copy_address = (unsigned long)zc->copybuf_address;
-       struct scm_timestamping_internal tss_unused;
-       int err, cmsg_flags_unused;
        struct msghdr msg = {};
        struct iovec iov;
+       int err;
 
        zc->length = 0;
        zc->recv_skip_hint = 0;
@@ -1862,7 +1882,7 @@ static int receive_fallback_to_copy(struct sock *sk,
                return err;
 
        err = tcp_recvmsg_locked(sk, &msg, inq, /*nonblock=*/1, /*flags=*/0,
-                                &tss_unused, &cmsg_flags_unused);
+                                tss, &zc->msg_flags);
        if (err < 0)
                return err;
 
@@ -1903,21 +1923,27 @@ static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
        return (__s32)copylen;
 }
 
-static int tcp_zerocopy_handle_leftover_data(struct tcp_zerocopy_receive *zc,
-                                            struct sock *sk,
-                                            struct sk_buff *skb,
-                                            u32 *seq,
-                                            s32 copybuf_len)
+static int tcp_zc_handle_leftover(struct tcp_zerocopy_receive *zc,
+                                 struct sock *sk,
+                                 struct sk_buff *skb,
+                                 u32 *seq,
+                                 s32 copybuf_len,
+                                 struct scm_timestamping_internal *tss)
 {
        u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint);
 
        if (!copylen)
                return 0;
        /* skb is null if inq < PAGE_SIZE. */
-       if (skb)
+       if (skb) {
                offset = *seq - TCP_SKB_CB(skb)->seq;
-       else
+       } else {
                skb = tcp_recv_skb(sk, *seq, &offset);
+               if (TCP_SKB_CB(skb)->has_rxtstamp) {
+                       tcp_update_recv_tstamps(skb, tss);
+                       zc->msg_flags |= TCP_CMSG_TS;
+               }
+       }
 
        zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset,
                                                  seq);
@@ -2004,9 +2030,37 @@ static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
                err);
 }
 
+static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+                              struct scm_timestamping_internal *tss);
+static void tcp_zc_finalize_rx_tstamp(struct sock *sk,
+                                     struct tcp_zerocopy_receive *zc,
+                                     struct scm_timestamping_internal *tss)
+{
+       unsigned long msg_control_addr;
+       struct msghdr cmsg_dummy;
+
+       msg_control_addr = (unsigned long)zc->msg_control;
+       cmsg_dummy.msg_control = (void *)msg_control_addr;
+       cmsg_dummy.msg_controllen =
+               (__kernel_size_t)zc->msg_controllen;
+       cmsg_dummy.msg_flags = in_compat_syscall()
+               ? MSG_CMSG_COMPAT : 0;
+       zc->msg_flags = 0;
+       if (zc->msg_control == msg_control_addr &&
+           zc->msg_controllen == cmsg_dummy.msg_controllen) {
+               tcp_recv_timestamp(&cmsg_dummy, sk, tss);
+               zc->msg_control = (__u64)
+                       ((uintptr_t)cmsg_dummy.msg_control);
+               zc->msg_controllen =
+                       (__u64)cmsg_dummy.msg_controllen;
+               zc->msg_flags = (__u32)cmsg_dummy.msg_flags;
+       }
+}
+
 #define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32
 static int tcp_zerocopy_receive(struct sock *sk,
-                               struct tcp_zerocopy_receive *zc)
+                               struct tcp_zerocopy_receive *zc,
+                               struct scm_timestamping_internal *tss)
 {
        u32 length = 0, offset, vma_len, avail_len, copylen = 0;
        unsigned long address = (unsigned long)zc->address;
@@ -2023,6 +2077,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
        int ret;
 
        zc->copybuf_len = 0;
+       zc->msg_flags = 0;
 
        if (address & (PAGE_SIZE - 1) || address != zc->address)
                return -EINVAL;
@@ -2033,7 +2088,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
        sock_rps_record_flow(sk);
 
        if (inq && inq <= copybuf_len)
-               return receive_fallback_to_copy(sk, zc, inq);
+               return receive_fallback_to_copy(sk, zc, inq, tss);
 
        if (inq < PAGE_SIZE) {
                zc->length = 0;
@@ -2078,6 +2133,11 @@ static int tcp_zerocopy_receive(struct sock *sk,
                        } else {
                                skb = tcp_recv_skb(sk, seq, &offset);
                        }
+
+                       if (TCP_SKB_CB(skb)->has_rxtstamp) {
+                               tcp_update_recv_tstamps(skb, tss);
+                               zc->msg_flags |= TCP_CMSG_TS;
+                       }
                        zc->recv_skip_hint = skb->len - offset;
                        frags = skb_advance_to_frag(skb, offset, &offset_frag);
                        if (!frags || offset_frag)
@@ -2120,8 +2180,7 @@ out:
        mmap_read_unlock(current->mm);
        /* Try to copy straggler data. */
        if (!ret)
-               copylen = tcp_zerocopy_handle_leftover_data(zc, sk, skb, &seq,
-                                                           copybuf_len);
+               copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss);
 
        if (length + copylen) {
                WRITE_ONCE(tp->copied_seq, seq);
@@ -2142,20 +2201,6 @@ out:
 }
 #endif
 
-static void tcp_update_recv_tstamps(struct sk_buff *skb,
-                                   struct scm_timestamping_internal *tss)
-{
-       if (skb->tstamp)
-               tss->ts[0] = ktime_to_timespec64(skb->tstamp);
-       else
-               tss->ts[0] = (struct timespec64) {0};
-
-       if (skb_hwtstamps(skb)->hwtstamp)
-               tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp);
-       else
-               tss->ts[2] = (struct timespec64) {0};
-}
-
 /* Similar to __sock_recv_timestamp, but does not require an skb */
 static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
                               struct scm_timestamping_internal *tss)
@@ -2272,7 +2317,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
                goto out;
 
        if (tp->recvmsg_inq)
-               *cmsg_flags = 1;
+               *cmsg_flags = TCP_CMSG_INQ;
        timeo = sock_rcvtimeo(sk, nonblock);
 
        /* Urgent data needs to be handled specially. */
@@ -2453,7 +2498,7 @@ skip_copy:
 
                if (TCP_SKB_CB(skb)->has_rxtstamp) {
                        tcp_update_recv_tstamps(skb, tss);
-                       *cmsg_flags |= 2;
+                       *cmsg_flags |= TCP_CMSG_TS;
                }
 
                if (used + offset < skb->len)
@@ -2513,9 +2558,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
        release_sock(sk);
 
        if (cmsg_flags && ret >= 0) {
-               if (cmsg_flags & 2)
+               if (cmsg_flags & TCP_CMSG_TS)
                        tcp_recv_timestamp(msg, sk, &tss);
-               if (cmsg_flags & 1) {
+               if (cmsg_flags & TCP_CMSG_INQ) {
                        inq = tcp_inq_hint(sk);
                        put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
                }
@@ -3767,11 +3812,24 @@ static size_t tcp_opt_stats_get_size(void)
                nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */
                nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */
                nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */
+               nla_total_size(sizeof(u8)) + /* TCP_NLA_TTL */
                0;
 }
 
+/* Returns TTL or hop limit of an incoming packet from skb. */
+static u8 tcp_skb_ttl_or_hop_limit(const struct sk_buff *skb)
+{
+       if (skb->protocol == htons(ETH_P_IP))
+               return ip_hdr(skb)->ttl;
+       else if (skb->protocol == htons(ETH_P_IPV6))
+               return ipv6_hdr(skb)->hop_limit;
+       else
+               return 0;
+}
+
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
-                                              const struct sk_buff *orig_skb)
+                                              const struct sk_buff *orig_skb,
+                                              const struct sk_buff *ack_skb)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *stats;
@@ -3827,6 +3885,9 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
                    max_t(int, 0, tp->write_seq - tp->snd_nxt));
        nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns,
                          TCP_NLA_PAD);
+       if (ack_skb)
+               nla_put_u8(stats, TCP_NLA_TTL,
+                          tcp_skb_ttl_or_hop_limit(ack_skb));
 
        return stats;
 }
@@ -4083,6 +4144,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
        }
 #ifdef CONFIG_MMU
        case TCP_ZEROCOPY_RECEIVE: {
+               struct scm_timestamping_internal tss;
                struct tcp_zerocopy_receive zc = {};
                int err;
 
@@ -4098,11 +4160,18 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                if (copy_from_user(&zc, optval, len))
                        return -EFAULT;
                lock_sock(sk);
-               err = tcp_zerocopy_receive(sk, &zc);
+               err = tcp_zerocopy_receive(sk, &zc, &tss);
                release_sock(sk);
-               if (len >= offsetofend(struct tcp_zerocopy_receive, err))
-                       goto zerocopy_rcv_sk_err;
+               if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags))
+                       goto zerocopy_rcv_cmsg;
                switch (len) {
+               case offsetofend(struct tcp_zerocopy_receive, msg_flags):
+                       goto zerocopy_rcv_cmsg;
+               case offsetofend(struct tcp_zerocopy_receive, msg_controllen):
+               case offsetofend(struct tcp_zerocopy_receive, msg_control):
+               case offsetofend(struct tcp_zerocopy_receive, flags):
+               case offsetofend(struct tcp_zerocopy_receive, copybuf_len):
+               case offsetofend(struct tcp_zerocopy_receive, copybuf_address):
                case offsetofend(struct tcp_zerocopy_receive, err):
                        goto zerocopy_rcv_sk_err;
                case offsetofend(struct tcp_zerocopy_receive, inq):
@@ -4111,6 +4180,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                default:
                        goto zerocopy_rcv_out;
                }
+zerocopy_rcv_cmsg:
+               if (zc.msg_flags & TCP_CMSG_TS)
+                       tcp_zc_finalize_rx_tstamp(sk, &zc, &tss);
+               else
+                       zc.msg_flags = 0;
 zerocopy_rcv_sk_err:
                if (!err)
                        zc.err = sock_error(sk);
index a7dfca0..d4f66ab 100644 (file)
@@ -3145,7 +3145,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
 }
 
 static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
-                          u32 prior_snd_una)
+                          const struct sk_buff *ack_skb, u32 prior_snd_una)
 {
        const struct skb_shared_info *shinfo;
 
@@ -3157,7 +3157,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
        if (!before(shinfo->tskey, prior_snd_una) &&
            before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
                tcp_skb_tsorted_save(skb) {
-                       __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+                       __skb_tstamp_tx(skb, ack_skb, NULL, sk, SCM_TSTAMP_ACK);
                } tcp_skb_tsorted_restore(skb);
        }
 }
@@ -3166,8 +3166,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
  * is before the ack sequence we can discard it as it's confirmed to have
  * arrived at the other end.
  */
-static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
-                              u32 prior_snd_una,
+static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
+                              u32 prior_fack, u32 prior_snd_una,
                               struct tcp_sacktag_state *sack, bool ece_ack)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3256,7 +3256,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
                if (!fully_acked)
                        break;
 
-               tcp_ack_tstamp(sk, skb, prior_snd_una);
+               tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
 
                next = skb_rb_next(skb);
                if (unlikely(skb == tp->retransmit_skb_hint))
@@ -3274,7 +3274,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
                tp->snd_up = tp->snd_una;
 
        if (skb) {
-               tcp_ack_tstamp(sk, skb, prior_snd_una);
+               tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
                if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
                        flag |= FLAG_SACK_RENEGING;
        }
@@ -3809,8 +3809,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
                goto no_queue;
 
        /* See if we can take anything off of the retransmit queue. */
-       flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state,
-                                   flag & FLAG_ECE);
+       flag |= tcp_clean_rtx_queue(sk, skb, prior_fack, prior_snd_una,
+                                   &sack_state, flag & FLAG_ECE);
 
        tcp_rack_update_reo_wnd(sk, &rs);
 
index 1168d18..4124970 100644 (file)
@@ -460,7 +460,8 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
        if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
                NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
 
-       if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
+       if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
+           (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
                pp = call_gro_receive(udp_gro_receive_segment, head, skb);
                return pp;
        }
index 9edc5bb..f2337fb 100644 (file)
@@ -205,6 +205,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
        .max_desync_factor      = MAX_DESYNC_FACTOR,
        .max_addresses          = IPV6_MAX_ADDRESSES,
        .accept_ra_defrtr       = 1,
+       .ra_defrtr_metric       = IP6_RT_PRIO_USER,
        .accept_ra_from_local   = 0,
        .accept_ra_min_hop_limit= 1,
        .accept_ra_pinfo        = 1,
@@ -260,6 +261,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
        .max_desync_factor      = MAX_DESYNC_FACTOR,
        .max_addresses          = IPV6_MAX_ADDRESSES,
        .accept_ra_defrtr       = 1,
+       .ra_defrtr_metric       = IP6_RT_PRIO_USER,
        .accept_ra_from_local   = 0,
        .accept_ra_min_hop_limit= 1,
        .accept_ra_pinfo        = 1,
@@ -5476,6 +5478,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
        array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
        array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
        array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
+       array[DEVCONF_RA_DEFRTR_METRIC] = cnf->ra_defrtr_metric;
        array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit;
        array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
 #ifdef CONFIG_IPV6_ROUTER_PREF
@@ -6668,6 +6671,14 @@ static const struct ctl_table addrconf_sysctl[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "ra_defrtr_metric",
+               .data           = &ipv6_devconf.ra_defrtr_metric,
+               .maxlen         = sizeof(u32),
+               .mode           = 0644,
+               .proc_handler   = proc_douintvec_minmax,
+               .extra1         = (void *)SYSCTL_ONE,
+       },
        {
                .procname       = "accept_ra_min_hop_limit",
                .data           = &ipv6_devconf.accept_ra_min_hop_limit,
index 7671747..c467c64 100644 (file)
@@ -1173,6 +1173,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
        struct neighbour *neigh = NULL;
        struct inet6_dev *in6_dev;
        struct fib6_info *rt = NULL;
+       u32 defrtr_usr_metric;
        struct net *net;
        int lifetime;
        struct ndisc_options ndopts;
@@ -1303,18 +1304,21 @@ static void ndisc_router_discovery(struct sk_buff *skb)
                        return;
                }
        }
-       if (rt && lifetime == 0) {
+       /* Set default route metric as specified by user */
+       defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric;
+       /* delete the route if lifetime is 0 or if metric needs change */
+       if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) {
                ip6_del_rt(net, rt, false);
                rt = NULL;
        }
 
-       ND_PRINTK(3, info, "RA: rt: %p  lifetime: %d, for dev: %s\n",
-                 rt, lifetime, skb->dev->name);
+       ND_PRINTK(3, info, "RA: rt: %p  lifetime: %d, metric: %d, for dev: %s\n",
+                 rt, lifetime, defrtr_usr_metric, skb->dev->name);
        if (!rt && lifetime) {
                ND_PRINTK(3, info, "RA: adding default router\n");
 
                rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
-                                        skb->dev, pref);
+                                        skb->dev, pref, defrtr_usr_metric);
                if (!rt) {
                        ND_PRINTK(0, err,
                                  "RA: %s failed to add default route\n",
index 188e114..41d8f80 100644 (file)
@@ -4252,11 +4252,12 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
 struct fib6_info *rt6_add_dflt_router(struct net *net,
                                     const struct in6_addr *gwaddr,
                                     struct net_device *dev,
-                                    unsigned int pref)
+                                    unsigned int pref,
+                                    u32 defrtr_usr_metric)
 {
        struct fib6_config cfg = {
                .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
-               .fc_metric      = IP6_RT_PRIO_USER,
+               .fc_metric      = defrtr_usr_metric,
                .fc_ifindex     = dev->ifindex,
                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
index 59755a9..9e7da0a 100644 (file)
@@ -3,4 +3,4 @@
 # Makefile for the L3 device API
 #
 
-obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
+obj-y += l3mdev.o
index b0e646a..7f79f5e 100644 (file)
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config LLC
        tristate
-       depends on NET
 
 config LLC2
        tristate "ANSI/IEEE 802.2 LLC type 2 Support"
index 9b1f629..83976b9 100644 (file)
@@ -60,15 +60,20 @@ static bool addresses_equal(const struct mptcp_addr_info *a,
 {
        bool addr_equals = false;
 
-       if (a->family != b->family)
-               return false;
-
-       if (a->family == AF_INET)
-               addr_equals = a->addr.s_addr == b->addr.s_addr;
+       if (a->family == b->family) {
+               if (a->family == AF_INET)
+                       addr_equals = a->addr.s_addr == b->addr.s_addr;
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-       else
-               addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6);
+               else
+                       addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6);
+       } else if (a->family == AF_INET) {
+               if (ipv6_addr_v4mapped(&b->addr6))
+                       addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3];
+       } else if (b->family == AF_INET) {
+               if (ipv6_addr_v4mapped(&a->addr6))
+                       addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr;
 #endif
+       }
 
        if (!addr_equals)
                return false;
@@ -137,6 +142,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
                     struct mptcp_sock *msk)
 {
        struct mptcp_pm_addr_entry *entry, *ret = NULL;
+       struct sock *sk = (struct sock *)msk;
 
        rcu_read_lock();
        __mptcp_flush_join_list(msk);
@@ -144,11 +150,20 @@ select_local_address(const struct pm_nl_pernet *pernet,
                if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
                        continue;
 
+               if (entry->addr.family != sk->sk_family) {
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+                       if ((entry->addr.family == AF_INET &&
+                            !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
+                           (sk->sk_family == AF_INET &&
+                            !ipv6_addr_v4mapped(&entry->addr.addr6)))
+#endif
+                               continue;
+               }
+
                /* avoid any address already in use by subflows and
                 * pending join
                 */
-               if (entry->addr.family == ((struct sock *)msk)->sk_family &&
-                   !lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
+               if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
                        ret = entry;
                        break;
                }
@@ -310,7 +325,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
 
 static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
 {
-       struct mptcp_addr_info remote = { 0 };
        struct sock *sk = (struct sock *)msk;
        struct mptcp_pm_addr_entry *local;
        struct pm_nl_pernet *pernet;
@@ -344,13 +358,14 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
        /* check if should create a new subflow */
        if (msk->pm.local_addr_used < msk->pm.local_addr_max &&
            msk->pm.subflows < msk->pm.subflows_max) {
-               remote_address((struct sock_common *)sk, &remote);
-
                local = select_local_address(pernet, msk);
                if (local) {
+                       struct mptcp_addr_info remote = { 0 };
+
                        msk->pm.local_addr_used++;
                        msk->pm.subflows++;
                        check_work_pending(msk);
+                       remote_address((struct sock_common *)sk, &remote);
                        spin_unlock_bh(&msk->pm.lock);
                        __mptcp_subflow_connect(sk, &local->addr, &remote);
                        spin_lock_bh(&msk->pm.lock);
index f998a07..a033bf9 100644 (file)
@@ -45,6 +45,9 @@ static struct percpu_counter mptcp_sockets_allocated;
 static void __mptcp_destroy_sock(struct sock *sk);
 static void __mptcp_check_send_data_fin(struct sock *sk);
 
+DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
+static struct net_device mptcp_napi_dev;
+
 /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
  * completed yet or has failed, return the subflow socket.
  * Otherwise return NULL.
@@ -114,11 +117,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
        list_add(&subflow->node, &msk->conn_list);
        sock_hold(ssock->sk);
        subflow->request_mptcp = 1;
-
-       /* accept() will wait on first subflow sk_wq, and we always wakes up
-        * via msk->sk_socket
-        */
-       RCU_INIT_POINTER(msk->first->sk_wq, &sk->sk_socket->wq);
+       mptcp_sock_graft(msk->first, sk->sk_socket);
 
        return 0;
 }
@@ -734,10 +733,14 @@ wake:
 
 void __mptcp_flush_join_list(struct mptcp_sock *msk)
 {
+       struct mptcp_subflow_context *subflow;
+
        if (likely(list_empty(&msk->join_list)))
                return;
 
        spin_lock_bh(&msk->join_list_lock);
+       list_for_each_entry(subflow, &msk->join_list, node)
+               mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
        list_splice_tail_init(&msk->join_list, &msk->conn_list);
        spin_unlock_bh(&msk->join_list_lock);
 }
@@ -1037,13 +1040,6 @@ out:
                        __mptcp_update_wmem(sk);
                        sk_mem_reclaim_partial(sk);
                }
-
-               if (sk_stream_is_writeable(sk)) {
-                       /* pairs with memory barrier in mptcp_poll */
-                       smp_mb();
-                       if (test_and_clear_bit(MPTCP_NOSPACE, &msk->flags))
-                               sk_stream_write_space(sk);
-               }
        }
 
        if (snd_una == READ_ONCE(msk->snd_nxt)) {
@@ -1362,8 +1358,7 @@ struct subflow_send_info {
        u64 ratio;
 };
 
-static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
-                                          u32 *sndbuf)
+static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 {
        struct subflow_send_info send_info[2];
        struct mptcp_subflow_context *subflow;
@@ -1374,24 +1369,17 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
 
        sock_owned_by_me((struct sock *)msk);
 
-       *sndbuf = 0;
        if (__mptcp_check_fallback(msk)) {
                if (!msk->first)
                        return NULL;
-               *sndbuf = msk->first->sk_sndbuf;
                return sk_stream_memory_free(msk->first) ? msk->first : NULL;
        }
 
        /* re-use last subflow, if the burst allow that */
        if (msk->last_snd && msk->snd_burst > 0 &&
            sk_stream_memory_free(msk->last_snd) &&
-           mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
-               mptcp_for_each_subflow(msk, subflow) {
-                       ssk =  mptcp_subflow_tcp_sock(subflow);
-                       *sndbuf = max(tcp_sk(ssk)->snd_wnd, *sndbuf);
-               }
+           mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd)))
                return msk->last_snd;
-       }
 
        /* pick the subflow with the lower wmem/wspace ratio */
        for (i = 0; i < 2; ++i) {
@@ -1404,8 +1392,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
                        continue;
 
                nr_active += !subflow->backup;
-               *sndbuf = max(tcp_sk(ssk)->snd_wnd, *sndbuf);
-               if (!sk_stream_memory_free(subflow->tcp_sock))
+               if (!sk_stream_memory_free(subflow->tcp_sock) || !tcp_sk(ssk)->snd_wnd)
                        continue;
 
                pace = READ_ONCE(ssk->sk_pacing_rate);
@@ -1431,9 +1418,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
        if (send_info[0].ssk) {
                msk->last_snd = send_info[0].ssk;
                msk->snd_burst = min_t(int, MPTCP_SEND_BURST_SIZE,
-                                      sk_stream_wspace(msk->last_snd));
+                                      tcp_sk(msk->last_snd)->snd_wnd);
                return msk->last_snd;
        }
+
        return NULL;
 }
 
@@ -1454,7 +1442,6 @@ static void mptcp_push_pending(struct sock *sk, unsigned int flags)
        };
        struct mptcp_data_frag *dfrag;
        int len, copied = 0;
-       u32 sndbuf;
 
        while ((dfrag = mptcp_send_head(sk))) {
                info.sent = dfrag->already_sent;
@@ -1465,12 +1452,7 @@ static void mptcp_push_pending(struct sock *sk, unsigned int flags)
 
                        prev_ssk = ssk;
                        __mptcp_flush_join_list(msk);
-                       ssk = mptcp_subflow_get_send(msk, &sndbuf);
-
-                       /* do auto tuning */
-                       if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
-                           sndbuf > READ_ONCE(sk->sk_sndbuf))
-                               WRITE_ONCE(sk->sk_sndbuf, sndbuf);
+                       ssk = mptcp_subflow_get_send(msk);
 
                        /* try to keep the subflow socket lock across
                         * consecutive xmit on the same socket
@@ -1527,7 +1509,9 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
        struct mptcp_sock *msk = mptcp_sk(sk);
        struct mptcp_sendmsg_info info;
        struct mptcp_data_frag *dfrag;
+       struct sock *xmit_ssk;
        int len, copied = 0;
+       bool first = true;
 
        info.flags = 0;
        while ((dfrag = mptcp_send_head(sk))) {
@@ -1537,10 +1521,17 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
                while (len > 0) {
                        int ret = 0;
 
-                       /* do auto tuning */
-                       if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
-                           ssk->sk_sndbuf > READ_ONCE(sk->sk_sndbuf))
-                               WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf);
+                       /* the caller already invoked the packet scheduler,
+                        * check for a different subflow usage only after
+                        * spooling the first chunk of data
+                        */
+                       xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+                       if (!xmit_ssk)
+                               goto out;
+                       if (xmit_ssk != ssk) {
+                               mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+                               goto out;
+                       }
 
                        if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) {
                                __mptcp_update_wmem(sk);
@@ -1560,6 +1551,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
                        msk->tx_pending_data -= ret;
                        copied += ret;
                        len -= ret;
+                       first = false;
                }
                WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
        }
@@ -1579,6 +1571,15 @@ out:
        }
 }
 
+static void mptcp_set_nospace(struct sock *sk)
+{
+       /* enable autotune */
+       set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+
+       /* will be cleared on avail space */
+       set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
+}
+
 static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
        struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1680,7 +1681,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                continue;
 
 wait_for_memory:
-               set_bit(MPTCP_NOSPACE, &msk->flags);
+               mptcp_set_nospace(sk);
                mptcp_push_pending(sk, msg->msg_flags);
                ret = sk_stream_wait_memory(sk, &timeo);
                if (ret)
@@ -2116,9 +2117,6 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
 void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
                       struct mptcp_subflow_context *subflow)
 {
-       bool dispose_socket = false;
-       struct socket *sock;
-
        list_del(&subflow->node);
 
        lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
@@ -2126,11 +2124,8 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
        /* if we are invoked by the msk cleanup code, the subflow is
         * already orphaned
         */
-       sock = ssk->sk_socket;
-       if (sock) {
-               dispose_socket = sock != sk->sk_socket;
+       if (ssk->sk_socket)
                sock_orphan(ssk);
-       }
 
        subflow->disposable = 1;
 
@@ -2148,8 +2143,6 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
                __sock_put(ssk);
        }
        release_sock(ssk);
-       if (dispose_socket)
-               iput(SOCK_INODE(sock));
 
        sock_put(ssk);
 }
@@ -2536,6 +2529,12 @@ static void __mptcp_destroy_sock(struct sock *sk)
 
        pr_debug("msk=%p", msk);
 
+       /* dispose the ancillatory tcp socket, if any */
+       if (msk->subflow) {
+               iput(SOCK_INODE(msk->subflow));
+               msk->subflow = NULL;
+       }
+
        /* be sure to always acquire the join list lock, to sync vs
         * mptcp_finish_join().
         */
@@ -2586,20 +2585,10 @@ cleanup:
        inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
        list_for_each_entry(subflow, &mptcp_sk(sk)->conn_list, node) {
                struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-               bool slow, dispose_socket;
-               struct socket *sock;
+               bool slow = lock_sock_fast(ssk);
 
-               slow = lock_sock_fast(ssk);
-               sock = ssk->sk_socket;
-               dispose_socket = sock && sock != sk->sk_socket;
                sock_orphan(ssk);
                unlock_sock_fast(ssk, slow);
-
-               /* for the outgoing subflows we additionally need to free
-                * the associated socket
-                */
-               if (dispose_socket)
-                       iput(SOCK_INODE(sock));
        }
        sock_orphan(sk);
 
@@ -2928,10 +2917,16 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
        if (!mptcp_send_head(sk))
                return;
 
-       if (!sock_owned_by_user(sk))
-               __mptcp_subflow_push_pending(sk, ssk);
-       else
+       if (!sock_owned_by_user(sk)) {
+               struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
+
+               if (xmit_ssk == ssk)
+                       __mptcp_subflow_push_pending(sk, ssk);
+               else if (xmit_ssk)
+                       mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+       } else {
                set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+       }
 }
 
 #define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED)
@@ -2979,6 +2974,20 @@ static void mptcp_release_cb(struct sock *sk)
        }
 }
 
+void mptcp_subflow_process_delegated(struct sock *ssk)
+{
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+       struct sock *sk = subflow->conn;
+
+       mptcp_data_lock(sk);
+       if (!sock_owned_by_user(sk))
+               __mptcp_subflow_push_pending(sk, ssk);
+       else
+               set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+       mptcp_data_unlock(sk);
+       mptcp_subflow_delegated_done(subflow);
+}
+
 static int mptcp_hash(struct sock *sk)
 {
        /* should never be called,
@@ -3041,7 +3050,7 @@ void mptcp_finish_connect(struct sock *ssk)
        mptcp_rcv_space_init(msk, ssk);
 }
 
-static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
+void mptcp_sock_graft(struct sock *sk, struct socket *parent)
 {
        write_lock_bh(&sk->sk_callback_lock);
        rcu_assign_pointer(sk->sk_wq, &parent->wq);
@@ -3284,6 +3293,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 
                mptcp_copy_inaddrs(newsk, msk->first);
                mptcp_rcv_space_init(msk, msk->first);
+               mptcp_propagate_sndbuf(newsk, msk->first);
 
                /* set ssk->sk_socket of accept()ed flows to mptcp socket.
                 * This is needed so NOSPACE flag can be set from tcp stack.
@@ -3324,7 +3334,7 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
        if (sk_stream_is_writeable(sk))
                return EPOLLOUT | EPOLLWRNORM;
 
-       set_bit(MPTCP_NOSPACE, &msk->flags);
+       mptcp_set_nospace(sk);
        smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */
        if (sk_stream_is_writeable(sk))
                return EPOLLOUT | EPOLLWRNORM;
@@ -3388,13 +3398,58 @@ static struct inet_protosw mptcp_protosw = {
        .flags          = INET_PROTOSW_ICSK,
 };
 
+static int mptcp_napi_poll(struct napi_struct *napi, int budget)
+{
+       struct mptcp_delegated_action *delegated;
+       struct mptcp_subflow_context *subflow;
+       int work_done = 0;
+
+       delegated = container_of(napi, struct mptcp_delegated_action, napi);
+       while ((subflow = mptcp_subflow_delegated_next(delegated)) != NULL) {
+               struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+               bh_lock_sock_nested(ssk);
+               if (!sock_owned_by_user(ssk) &&
+                   mptcp_subflow_has_delegated_action(subflow))
+                       mptcp_subflow_process_delegated(ssk);
+               /* ... elsewhere tcp_release_cb_override already processed
+                * the action or will do at next release_sock().
+                * In both case must dequeue the subflow here - on the same
+                * CPU that scheduled it.
+                */
+               bh_unlock_sock(ssk);
+               sock_put(ssk);
+
+               if (++work_done == budget)
+                       return budget;
+       }
+
+       /* always provide a 0 'work_done' argument, so that napi_complete_done
+        * will not try accessing the NULL napi->dev ptr
+        */
+       napi_complete_done(napi, 0);
+       return work_done;
+}
+
 void __init mptcp_proto_init(void)
 {
+       struct mptcp_delegated_action *delegated;
+       int cpu;
+
        mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo;
 
        if (percpu_counter_init(&mptcp_sockets_allocated, 0, GFP_KERNEL))
                panic("Failed to allocate MPTCP pcpu counter\n");
 
+       init_dummy_netdev(&mptcp_napi_dev);
+       for_each_possible_cpu(cpu) {
+               delegated = per_cpu_ptr(&mptcp_delegated_actions, cpu);
+               INIT_LIST_HEAD(&delegated->head);
+               netif_tx_napi_add(&mptcp_napi_dev, &delegated->napi, mptcp_napi_poll,
+                                 NAPI_POLL_WEIGHT);
+               napi_enable(&delegated->napi);
+       }
+
        mptcp_subflow_init();
        mptcp_pm_init();
        mptcp_token_init();
index d6400ad..1460705 100644 (file)
@@ -378,6 +378,15 @@ enum mptcp_data_avail {
        MPTCP_SUBFLOW_OOO_DATA
 };
 
+struct mptcp_delegated_action {
+       struct napi_struct napi;
+       struct list_head head;
+};
+
+DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
+
+#define MPTCP_DELEGATE_SEND            0
+
 /* MPTCP subflow context */
 struct mptcp_subflow_context {
        struct  list_head node;/* conn_list of subflows */
@@ -415,6 +424,9 @@ struct mptcp_subflow_context {
        u8      local_id;
        u8      remote_id;
 
+       long    delegated_status;
+       struct  list_head delegated_node;   /* link into delegated_action, protected by local BH */
+
        struct  sock *tcp_sock;     /* tcp sk backpointer */
        struct  sock *conn;         /* parent mptcp_sock */
        const   struct inet_connection_sock_af_ops *icsk_af_ops;
@@ -463,6 +475,61 @@ static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
        spin_unlock_bh(&msk->join_list_lock);
 }
 
+void mptcp_subflow_process_delegated(struct sock *ssk);
+
+static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow)
+{
+       struct mptcp_delegated_action *delegated;
+       bool schedule;
+
+       /* The implied barrier pairs with mptcp_subflow_delegated_done(), and
+        * ensures the below list check sees list updates done prior to status
+        * bit changes
+        */
+       if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+               /* still on delegated list from previous scheduling */
+               if (!list_empty(&subflow->delegated_node))
+                       return;
+
+               /* the caller held the subflow bh socket lock */
+               lockdep_assert_in_softirq();
+
+               delegated = this_cpu_ptr(&mptcp_delegated_actions);
+               schedule = list_empty(&delegated->head);
+               list_add_tail(&subflow->delegated_node, &delegated->head);
+               sock_hold(mptcp_subflow_tcp_sock(subflow));
+               if (schedule)
+                       napi_schedule(&delegated->napi);
+       }
+}
+
+static inline struct mptcp_subflow_context *
+mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
+{
+       struct mptcp_subflow_context *ret;
+
+       if (list_empty(&delegated->head))
+               return NULL;
+
+       ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node);
+       list_del_init(&ret->delegated_node);
+       return ret;
+}
+
+static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
+{
+       return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+}
+
+static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow)
+{
+       /* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
+        * touching the status bit
+        */
+       smp_wmb();
+       clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+}
+
 int mptcp_is_enabled(struct net *net);
 unsigned int mptcp_get_add_addr_timeout(struct net *net);
 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
@@ -473,6 +540,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
 void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
                       struct mptcp_subflow_context *subflow);
 void mptcp_subflow_reset(struct sock *ssk);
+void mptcp_sock_graft(struct sock *sk, struct socket *parent);
 
 /* called with sk socket lock held */
 int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
@@ -521,6 +589,25 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
               READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
 }
 
+static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
+{
+       if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf))
+               return false;
+
+       WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf);
+       return true;
+}
+
+static inline void mptcp_write_space(struct sock *sk)
+{
+       if (sk_stream_is_writeable(sk)) {
+               /* pairs with memory barrier in mptcp_poll */
+               smp_mb();
+               if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
+                       sk_stream_write_space(sk);
+       }
+}
+
 void mptcp_destroy_common(struct mptcp_sock *msk);
 
 void __init mptcp_token_init(void);
index 278cbe3..5861562 100644 (file)
 #include <net/tcp.h>
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 #include <net/ip6_route.h>
+#include <net/transp_v6.h>
 #endif
 #include <net/mptcp.h>
 #include <uapi/linux/mptcp.h>
 #include "protocol.h"
 #include "mib.h"
 
+static void mptcp_subflow_ops_undo_override(struct sock *ssk);
+
 static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
                                  enum linux_mptcp_mib_field field)
 {
@@ -343,6 +346,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
        if (subflow->conn_finished)
                return;
 
+       mptcp_propagate_sndbuf(parent, sk);
        subflow->rel_write_seq = 1;
        subflow->conn_finished = 1;
        subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
@@ -427,6 +431,7 @@ drop:
 static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
 static struct inet_connection_sock_af_ops subflow_v6_specific;
 static struct inet_connection_sock_af_ops subflow_v6m_specific;
+static struct proto tcpv6_prot_override;
 
 static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
@@ -508,6 +513,8 @@ static void subflow_ulp_fallback(struct sock *sk,
        icsk->icsk_ulp_ops = NULL;
        rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
        tcp_sk(sk)->is_mptcp = 0;
+
+       mptcp_subflow_ops_undo_override(sk);
 }
 
 static void subflow_drop_ctx(struct sock *ssk)
@@ -681,6 +688,7 @@ dispose_child:
 }
 
 static struct inet_connection_sock_af_ops subflow_specific;
+static struct proto tcp_prot_override;
 
 enum mapping_status {
        MAPPING_OK,
@@ -1040,7 +1048,10 @@ static void subflow_data_ready(struct sock *sk)
 
 static void subflow_write_space(struct sock *ssk)
 {
-       /* we take action in __mptcp_clean_una() */
+       struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+       mptcp_propagate_sndbuf(sk, ssk);
+       mptcp_write_space(sk);
 }
 
 static struct inet_connection_sock_af_ops *
@@ -1074,21 +1085,31 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
 #endif
 
 static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
-                               struct sockaddr_storage *addr)
+                               struct sockaddr_storage *addr,
+                               unsigned short family)
 {
        memset(addr, 0, sizeof(*addr));
-       addr->ss_family = info->family;
+       addr->ss_family = family;
        if (addr->ss_family == AF_INET) {
                struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
 
-               in_addr->sin_addr = info->addr;
+               if (info->family == AF_INET)
+                       in_addr->sin_addr = info->addr;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+               else if (ipv6_addr_v4mapped(&info->addr6))
+                       in_addr->sin_addr.s_addr = info->addr6.s6_addr32[3];
+#endif
                in_addr->sin_port = info->port;
        }
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
        else if (addr->ss_family == AF_INET6) {
                struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
 
-               in6_addr->sin6_addr = info->addr6;
+               if (info->family == AF_INET)
+                       ipv6_addr_set_v4mapped(info->addr.s_addr,
+                                              &in6_addr->sin6_addr);
+               else
+                       in6_addr->sin6_addr = info->addr6;
                in6_addr->sin6_port = info->port;
        }
 #endif
@@ -1132,11 +1153,11 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
        subflow->remote_key = msk->remote_key;
        subflow->local_key = msk->local_key;
        subflow->token = msk->token;
-       mptcp_info2sockaddr(loc, &addr);
+       mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
 
        addrlen = sizeof(struct sockaddr_in);
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-       if (loc->family == AF_INET6)
+       if (addr.ss_family == AF_INET6)
                addrlen = sizeof(struct sockaddr_in6);
 #endif
        ssk->sk_bound_dev_if = loc->ifindex;
@@ -1152,13 +1173,16 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
        subflow->remote_id = remote_id;
        subflow->request_join = 1;
        subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
-       mptcp_info2sockaddr(remote, &addr);
+       mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
 
        mptcp_add_pending_subflow(msk, subflow);
        err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
        if (err && err != -EINPROGRESS)
                goto failed_unlink;
 
+       /* discard the subflow socket */
+       mptcp_sock_graft(ssk, sk->sk_socket);
+       iput(SOCK_INODE(sf));
        return err;
 
 failed_unlink:
@@ -1196,6 +1220,25 @@ static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
 #endif /* CONFIG_SOCK_CGROUP_DATA */
 }
 
+static void mptcp_subflow_ops_override(struct sock *ssk)
+{
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+       if (ssk->sk_prot == &tcpv6_prot)
+               ssk->sk_prot = &tcpv6_prot_override;
+       else
+#endif
+               ssk->sk_prot = &tcp_prot_override;
+}
+
+static void mptcp_subflow_ops_undo_override(struct sock *ssk)
+{
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+       if (ssk->sk_prot == &tcpv6_prot_override)
+               ssk->sk_prot = &tcpv6_prot;
+       else
+#endif
+               ssk->sk_prot = &tcp_prot;
+}
 int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
 {
        struct mptcp_subflow_context *subflow;
@@ -1251,6 +1294,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
        *new_sock = sf;
        sock_hold(sk);
        subflow->conn = sk;
+       mptcp_subflow_ops_override(sf->sk);
 
        return 0;
 }
@@ -1267,6 +1311,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
 
        rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
        INIT_LIST_HEAD(&ctx->node);
+       INIT_LIST_HEAD(&ctx->delegated_node);
 
        pr_debug("subflow=%p", ctx);
 
@@ -1299,6 +1344,7 @@ static void subflow_state_change(struct sock *sk)
        __subflow_state_change(sk);
 
        if (subflow_simultaneous_connect(sk)) {
+               mptcp_propagate_sndbuf(parent, sk);
                mptcp_do_fallback(sk);
                mptcp_rcv_space_init(mptcp_sk(parent), sk);
                pr_fallback(mptcp_sk(parent));
@@ -1378,6 +1424,7 @@ static void subflow_ulp_release(struct sock *ssk)
                sock_put(sk);
        }
 
+       mptcp_subflow_ops_undo_override(ssk);
        if (release)
                kfree_rcu(ctx, rcu);
 }
@@ -1431,6 +1478,16 @@ static void subflow_ulp_clone(const struct request_sock *req,
        }
 }
 
+static void tcp_release_cb_override(struct sock *ssk)
+{
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
+       if (mptcp_subflow_has_delegated_action(subflow))
+               mptcp_subflow_process_delegated(ssk);
+
+       tcp_release_cb(ssk);
+}
+
 static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
        .name           = "mptcp",
        .owner          = THIS_MODULE,
@@ -1471,6 +1528,9 @@ void __init mptcp_subflow_init(void)
        subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
        subflow_specific.sk_rx_dst_set = subflow_finish_connect;
 
+       tcp_prot_override = tcp_prot;
+       tcp_prot_override.release_cb = tcp_release_cb_override;
+
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
        subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
        subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req;
@@ -1486,6 +1546,9 @@ void __init mptcp_subflow_init(void)
        subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
        subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
        subflow_v6m_specific.net_frag_header_len = 0;
+
+       tcpv6_prot_override = tcpv6_prot;
+       tcpv6_prot_override.release_cb = tcp_release_cb_override;
 #endif
 
        mptcp_diag_subflow_init(&subflow_ulp_ops);
index 49fbef0..1a92063 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menu "Core Netfilter Configuration"
-       depends on NET && INET && NETFILTER
+       depends on INET && NETFILTER
 
 config NETFILTER_INGRESS
        bool "Netfilter ingress support"
index eb0e329..c39a1e3 100644 (file)
@@ -4,7 +4,7 @@
 #
 menuconfig IP_VS
        tristate "IP virtual server support"
-       depends on NET && INET && NETFILTER
+       depends on INET && NETFILTER
        depends on (NF_CONNTRACK || NF_CONNTRACK=n)
        help
          IP Virtual Server support will let you build a high-performance
index 96b9167..466a027 100644 (file)
@@ -4,7 +4,6 @@
 #
 
 menuconfig NFC
-       depends on NET
        depends on RFKILL || !RFKILL
        tristate "NFC subsystem support"
        default n
index 028f514..be0b839 100644 (file)
@@ -4,7 +4,6 @@
 #
 
 menuconfig PSAMPLE
-       depends on NET
        tristate "Packet-sampling netlink channel"
        default n
        help
index 6fe4e5c..e2e4353 100644 (file)
@@ -1866,7 +1866,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
 static int tclass_del_notify(struct net *net,
                             const struct Qdisc_class_ops *cops,
                             struct sk_buff *oskb, struct nlmsghdr *n,
-                            struct Qdisc *q, unsigned long cl)
+                            struct Qdisc *q, unsigned long cl,
+                            struct netlink_ext_ack *extack)
 {
        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
        struct sk_buff *skb;
@@ -1885,7 +1886,7 @@ static int tclass_del_notify(struct net *net,
                return -EINVAL;
        }
 
-       err = cops->delete(q, cl);
+       err = cops->delete(q, cl, extack);
        if (err) {
                kfree_skb(skb);
                return err;
@@ -2088,7 +2089,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
                                goto out;
                        break;
                case RTM_DELTCLASS:
-                       err = tclass_del_notify(net, cops, skb, n, q, cl);
+                       err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
                        /* Unbind the class with flilters with 0 */
                        tc_bind_tclass(q, portid, clid, 0);
                        goto out;
index 007bd2d..d0c9a57 100644 (file)
@@ -320,7 +320,8 @@ err_out:
        return error;
 }
 
-static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
+static int atm_tc_delete(struct Qdisc *sch, unsigned long arg,
+                        struct netlink_ext_ack *extack)
 {
        struct atm_qdisc_data *p = qdisc_priv(sch);
        struct atm_flow_data *flow = (struct atm_flow_data *)arg;
index 53d45e0..320b3d3 100644 (file)
@@ -1675,7 +1675,8 @@ failure:
        return err;
 }
 
-static int cbq_delete(struct Qdisc *sch, unsigned long arg)
+static int cbq_delete(struct Qdisc *sch, unsigned long arg,
+                     struct netlink_ext_ack *extack)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
        struct cbq_class *cl = (struct cbq_class *)arg;
index dde5646..fc1e470 100644 (file)
@@ -146,7 +146,8 @@ static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
        kfree(cl);
 }
 
-static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
+static int drr_delete_class(struct Qdisc *sch, unsigned long arg,
+                           struct netlink_ext_ack *extack)
 {
        struct drr_sched *q = qdisc_priv(sch);
        struct drr_class *cl = (struct drr_class *)arg;
index 2b88710..cd2748e 100644 (file)
@@ -150,7 +150,8 @@ errout:
        return err;
 }
 
-static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
+static int dsmark_delete(struct Qdisc *sch, unsigned long arg,
+                        struct netlink_ext_ack *extack)
 {
        struct dsmark_qdisc_data *p = qdisc_priv(sch);
 
index d1902fc..bf0034c 100644 (file)
@@ -1090,7 +1090,8 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
 }
 
 static int
-hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
+hfsc_delete_class(struct Qdisc *sch, unsigned long arg,
+                 struct netlink_ext_ack *extack)
 {
        struct hfsc_sched *q = qdisc_priv(sch);
        struct hfsc_class *cl = (struct hfsc_class *)arg;
index cd70dbc..dff3adf 100644 (file)
@@ -114,6 +114,7 @@ struct htb_class {
         * Written often fields
         */
        struct gnet_stats_basic_packed bstats;
+       struct gnet_stats_basic_packed bstats_bias;
        struct tc_htb_xstats    xstats; /* our special stats */
 
        /* token bucket parameters */
@@ -174,6 +175,11 @@ struct htb_sched {
        int                     row_mask[TC_HTB_MAXDEPTH];
 
        struct htb_level        hlevel[TC_HTB_MAXDEPTH];
+
+       struct Qdisc            **direct_qdiscs;
+       unsigned int            num_direct_qdiscs;
+
+       bool                    offload;
 };
 
 /* find class in global hash table using given handle */
@@ -957,7 +963,7 @@ static void htb_reset(struct Qdisc *sch)
                        if (cl->level)
                                memset(&cl->inner, 0, sizeof(cl->inner));
                        else {
-                               if (cl->leaf.q)
+                               if (cl->leaf.q && !q->offload)
                                        qdisc_reset(cl->leaf.q);
                        }
                        cl->prio_activity = 0;
@@ -980,6 +986,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
        [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
        [TCA_HTB_RATE64] = { .type = NLA_U64 },
        [TCA_HTB_CEIL64] = { .type = NLA_U64 },
+       [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
 };
 
 static void htb_work_func(struct work_struct *work)
@@ -992,12 +999,27 @@ static void htb_work_func(struct work_struct *work)
        rcu_read_unlock();
 }
 
+static void htb_set_lockdep_class_child(struct Qdisc *q)
+{
+       static struct lock_class_key child_key;
+
+       lockdep_set_class(qdisc_lock(q), &child_key);
+}
+
+static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
+{
+       return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
+}
+
 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
                    struct netlink_ext_ack *extack)
 {
+       struct net_device *dev = qdisc_dev(sch);
+       struct tc_htb_qopt_offload offload_opt;
        struct htb_sched *q = qdisc_priv(sch);
        struct nlattr *tb[TCA_HTB_MAX + 1];
        struct tc_htb_glob *gopt;
+       unsigned int ntx;
        int err;
 
        qdisc_watchdog_init(&q->watchdog, sch);
@@ -1022,9 +1044,26 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
        if (gopt->version != HTB_VER >> 16)
                return -EINVAL;
 
+       q->offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
+
+       if (q->offload) {
+               if (sch->parent != TC_H_ROOT)
+                       return -EOPNOTSUPP;
+
+               if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+                       return -EOPNOTSUPP;
+
+               q->num_direct_qdiscs = dev->real_num_tx_queues;
+               q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
+                                          sizeof(*q->direct_qdiscs),
+                                          GFP_KERNEL);
+               if (!q->direct_qdiscs)
+                       return -ENOMEM;
+       }
+
        err = qdisc_class_hash_init(&q->clhash);
        if (err < 0)
-               return err;
+               goto err_free_direct_qdiscs;
 
        qdisc_skb_head_init(&q->direct_queue);
 
@@ -1037,7 +1076,107 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
                q->rate2quantum = 1;
        q->defcls = gopt->defcls;
 
+       if (!q->offload)
+               return 0;
+
+       for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
+               struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
+               struct Qdisc *qdisc;
+
+               qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
+                                         TC_H_MAKE(sch->handle, 0), extack);
+               if (!qdisc) {
+                       err = -ENOMEM;
+                       goto err_free_qdiscs;
+               }
+
+               htb_set_lockdep_class_child(qdisc);
+               q->direct_qdiscs[ntx] = qdisc;
+               qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+       }
+
+       sch->flags |= TCQ_F_MQROOT;
+
+       offload_opt = (struct tc_htb_qopt_offload) {
+               .command = TC_HTB_CREATE,
+               .parent_classid = TC_H_MAJ(sch->handle) >> 16,
+               .classid = TC_H_MIN(q->defcls),
+               .extack = extack,
+       };
+       err = htb_offload(dev, &offload_opt);
+       if (err)
+               goto err_free_qdiscs;
+
        return 0;
+
+err_free_qdiscs:
+       /* TC_HTB_CREATE call failed, avoid any further calls to the driver. */
+       q->offload = false;
+
+       for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
+            ntx++)
+               qdisc_put(q->direct_qdiscs[ntx]);
+
+       qdisc_class_hash_destroy(&q->clhash);
+       /* Prevent use-after-free and double-free when htb_destroy gets called.
+        */
+       q->clhash.hash = NULL;
+       q->clhash.hashsize = 0;
+
+err_free_direct_qdiscs:
+       kfree(q->direct_qdiscs);
+       q->direct_qdiscs = NULL;
+       return err;
+}
+
+static void htb_attach_offload(struct Qdisc *sch)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       struct htb_sched *q = qdisc_priv(sch);
+       unsigned int ntx;
+
+       for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
+               struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
+
+               old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+               qdisc_put(old);
+               qdisc_hash_add(qdisc, false);
+       }
+       for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
+               struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
+               struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
+
+               qdisc_put(old);
+       }
+
+       kfree(q->direct_qdiscs);
+       q->direct_qdiscs = NULL;
+}
+
+static void htb_attach_software(struct Qdisc *sch)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       unsigned int ntx;
+
+       /* Resemble qdisc_graft behavior. */
+       for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+               struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
+               struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
+
+               qdisc_refcount_inc(sch);
+
+               qdisc_put(old);
+       }
+}
+
+static void htb_attach(struct Qdisc *sch)
+{
+       struct htb_sched *q = qdisc_priv(sch);
+
+       if (q->offload)
+               htb_attach_offload(sch);
+       else
+               htb_attach_software(sch);
 }
 
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -1046,6 +1185,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
        struct nlattr *nest;
        struct tc_htb_glob gopt;
 
+       if (q->offload)
+               sch->flags |= TCQ_F_OFFLOADED;
+       else
+               sch->flags &= ~TCQ_F_OFFLOADED;
+
        sch->qstats.overlimits = q->overlimits;
        /* Its safe to not acquire qdisc lock. As we hold RTNL,
         * no change can happen on the qdisc parameters.
@@ -1063,6 +1207,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
        if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
            nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
                goto nla_put_failure;
+       if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
+               goto nla_put_failure;
 
        return nla_nest_end(skb, nest);
 
@@ -1075,6 +1221,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
                          struct sk_buff *skb, struct tcmsg *tcm)
 {
        struct htb_class *cl = (struct htb_class *)arg;
+       struct htb_sched *q = qdisc_priv(sch);
        struct nlattr *nest;
        struct tc_htb_opt opt;
 
@@ -1101,6 +1248,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
        opt.level = cl->level;
        if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
                goto nla_put_failure;
+       if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
+               goto nla_put_failure;
        if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
            nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
                              TCA_HTB_PAD))
@@ -1117,10 +1266,39 @@ nla_put_failure:
        return -1;
 }
 
+static void htb_offload_aggregate_stats(struct htb_sched *q,
+                                       struct htb_class *cl)
+{
+       struct htb_class *c;
+       unsigned int i;
+
+       memset(&cl->bstats, 0, sizeof(cl->bstats));
+
+       for (i = 0; i < q->clhash.hashsize; i++) {
+               hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
+                       struct htb_class *p = c;
+
+                       while (p && p->level < cl->level)
+                               p = p->parent;
+
+                       if (p != cl)
+                               continue;
+
+                       cl->bstats.bytes += c->bstats_bias.bytes;
+                       cl->bstats.packets += c->bstats_bias.packets;
+                       if (c->level == 0) {
+                               cl->bstats.bytes += c->leaf.q->bstats.bytes;
+                               cl->bstats.packets += c->leaf.q->bstats.packets;
+                       }
+               }
+       }
+}
+
 static int
 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 {
        struct htb_class *cl = (struct htb_class *)arg;
+       struct htb_sched *q = qdisc_priv(sch);
        struct gnet_stats_queue qs = {
                .drops = cl->drops,
                .overlimits = cl->overlimits,
@@ -1135,6 +1313,19 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
        cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
                                     INT_MIN, INT_MAX);
 
+       if (q->offload) {
+               if (!cl->level) {
+                       if (cl->leaf.q)
+                               cl->bstats = cl->leaf.q->bstats;
+                       else
+                               memset(&cl->bstats, 0, sizeof(cl->bstats));
+                       cl->bstats.bytes += cl->bstats_bias.bytes;
+                       cl->bstats.packets += cl->bstats_bias.packets;
+               } else {
+                       htb_offload_aggregate_stats(q, cl);
+               }
+       }
+
        if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
                                  d, NULL, &cl->bstats) < 0 ||
            gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
@@ -1144,19 +1335,97 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
 }
 
+static struct netdev_queue *
+htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       struct tc_htb_qopt_offload offload_opt;
+       int err;
+
+       offload_opt = (struct tc_htb_qopt_offload) {
+               .command = TC_HTB_LEAF_QUERY_QUEUE,
+               .classid = TC_H_MIN(tcm->tcm_parent),
+       };
+       err = htb_offload(dev, &offload_opt);
+       if (err || offload_opt.qid >= dev->num_tx_queues)
+               return NULL;
+       return netdev_get_tx_queue(dev, offload_opt.qid);
+}
+
+static struct Qdisc *
+htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
+{
+       struct net_device *dev = dev_queue->dev;
+       struct Qdisc *old_q;
+
+       if (dev->flags & IFF_UP)
+               dev_deactivate(dev);
+       old_q = dev_graft_qdisc(dev_queue, new_q);
+       if (new_q)
+               new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+       if (dev->flags & IFF_UP)
+               dev_activate(dev);
+
+       return old_q;
+}
+
+static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
+{
+       struct netdev_queue *queue_old, *queue_new;
+       struct net_device *dev = qdisc_dev(sch);
+       struct Qdisc *qdisc;
+
+       queue_old = netdev_get_tx_queue(dev, qid_old);
+       queue_new = netdev_get_tx_queue(dev, qid_new);
+
+       if (dev->flags & IFF_UP)
+               dev_deactivate(dev);
+       qdisc = dev_graft_qdisc(queue_old, NULL);
+       qdisc->dev_queue = queue_new;
+       qdisc = dev_graft_qdisc(queue_new, qdisc);
+       if (dev->flags & IFF_UP)
+               dev_activate(dev);
+
+       WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
+}
+
 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
                     struct Qdisc **old, struct netlink_ext_ack *extack)
 {
+       struct netdev_queue *dev_queue = sch->dev_queue;
        struct htb_class *cl = (struct htb_class *)arg;
+       struct htb_sched *q = qdisc_priv(sch);
+       struct Qdisc *old_q;
 
        if (cl->level)
                return -EINVAL;
-       if (new == NULL &&
-           (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-                                    cl->common.classid, extack)) == NULL)
-               return -ENOBUFS;
+
+       if (q->offload) {
+               dev_queue = new->dev_queue;
+               WARN_ON(dev_queue != cl->leaf.q->dev_queue);
+       }
+
+       if (!new) {
+               new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
+                                       cl->common.classid, extack);
+               if (!new)
+                       return -ENOBUFS;
+       }
+
+       if (q->offload) {
+               htb_set_lockdep_class_child(new);
+               /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
+               qdisc_refcount_inc(new);
+               old_q = htb_graft_helper(dev_queue, new);
+       }
 
        *old = qdisc_replace(sch, new, &cl->leaf.q);
+
+       if (q->offload) {
+               WARN_ON(old_q != *old);
+               qdisc_put(old_q);
+       }
+
        return 0;
 }
 
@@ -1184,9 +1453,10 @@ static inline int htb_parent_last_child(struct htb_class *cl)
        return 1;
 }
 
-static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
+static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
                               struct Qdisc *new_q)
 {
+       struct htb_sched *q = qdisc_priv(sch);
        struct htb_class *parent = cl->parent;
 
        WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
@@ -1204,6 +1474,76 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
        parent->cmode = HTB_CAN_SEND;
 }
 
+static void htb_parent_to_leaf_offload(struct Qdisc *sch,
+                                      struct netdev_queue *dev_queue,
+                                      struct Qdisc *new_q)
+{
+       struct Qdisc *old_q;
+
+       /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
+       qdisc_refcount_inc(new_q);
+       old_q = htb_graft_helper(dev_queue, new_q);
+       WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
+}
+
+static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
+                                    bool last_child, bool destroying,
+                                    struct netlink_ext_ack *extack)
+{
+       struct tc_htb_qopt_offload offload_opt;
+       struct Qdisc *q = cl->leaf.q;
+       struct Qdisc *old = NULL;
+       int err;
+
+       if (cl->level)
+               return -EINVAL;
+
+       WARN_ON(!q);
+       if (!destroying) {
+               /* On destroy of HTB, two cases are possible:
+                * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
+                * 2. q is a noop qdisc (for nodes that were inner),
+                *    q->dev_queue is noop_netdev_queue.
+                */
+               old = htb_graft_helper(q->dev_queue, NULL);
+               WARN_ON(!old);
+               WARN_ON(old != q);
+       }
+
+       if (cl->parent) {
+               cl->parent->bstats_bias.bytes += q->bstats.bytes;
+               cl->parent->bstats_bias.packets += q->bstats.packets;
+       }
+
+       offload_opt = (struct tc_htb_qopt_offload) {
+               .command = !last_child ? TC_HTB_LEAF_DEL :
+                          destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
+                          TC_HTB_LEAF_DEL_LAST,
+               .classid = cl->common.classid,
+               .extack = extack,
+       };
+       err = htb_offload(qdisc_dev(sch), &offload_opt);
+
+       if (!err || destroying)
+               qdisc_put(old);
+       else
+               htb_graft_helper(q->dev_queue, old);
+
+       if (last_child)
+               return err;
+
+       if (!err && offload_opt.moved_qid != 0) {
+               if (destroying)
+                       q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
+                                                          offload_opt.qid);
+               else
+                       htb_offload_move_qdisc(sch, offload_opt.moved_qid,
+                                              offload_opt.qid);
+       }
+
+       return err;
+}
+
 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
        if (!cl->level) {
@@ -1217,8 +1557,11 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 
 static void htb_destroy(struct Qdisc *sch)
 {
+       struct net_device *dev = qdisc_dev(sch);
+       struct tc_htb_qopt_offload offload_opt;
        struct htb_sched *q = qdisc_priv(sch);
        struct hlist_node *next;
+       bool nonempty, changed;
        struct htb_class *cl;
        unsigned int i;
 
@@ -1237,21 +1580,68 @@ static void htb_destroy(struct Qdisc *sch)
                        cl->block = NULL;
                }
        }
-       for (i = 0; i < q->clhash.hashsize; i++) {
-               hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
-                                         common.hnode)
-                       htb_destroy_class(sch, cl);
-       }
+
+       do {
+               nonempty = false;
+               changed = false;
+               for (i = 0; i < q->clhash.hashsize; i++) {
+                       hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
+                                                 common.hnode) {
+                               bool last_child;
+
+                               if (!q->offload) {
+                                       htb_destroy_class(sch, cl);
+                                       continue;
+                               }
+
+                               nonempty = true;
+
+                               if (cl->level)
+                                       continue;
+
+                               changed = true;
+
+                               last_child = htb_parent_last_child(cl);
+                               htb_destroy_class_offload(sch, cl, last_child,
+                                                         true, NULL);
+                               qdisc_class_hash_remove(&q->clhash,
+                                                       &cl->common);
+                               if (cl->parent)
+                                       cl->parent->children--;
+                               if (last_child)
+                                       htb_parent_to_leaf(sch, cl, NULL);
+                               htb_destroy_class(sch, cl);
+                       }
+               }
+       } while (changed);
+       WARN_ON(nonempty);
+
        qdisc_class_hash_destroy(&q->clhash);
        __qdisc_reset_queue(&q->direct_queue);
+
+       if (!q->offload)
+               return;
+
+       offload_opt = (struct tc_htb_qopt_offload) {
+               .command = TC_HTB_DESTROY,
+       };
+       htb_offload(dev, &offload_opt);
+
+       if (!q->direct_qdiscs)
+               return;
+       for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
+               qdisc_put(q->direct_qdiscs[i]);
+       kfree(q->direct_qdiscs);
 }
 
-static int htb_delete(struct Qdisc *sch, unsigned long arg)
+static int htb_delete(struct Qdisc *sch, unsigned long arg,
+                     struct netlink_ext_ack *extack)
 {
        struct htb_sched *q = qdisc_priv(sch);
        struct htb_class *cl = (struct htb_class *)arg;
        struct Qdisc *new_q = NULL;
        int last_child = 0;
+       int err;
 
        /* TODO: why don't allow to delete subtree ? references ? does
         * tc subsys guarantee us that in htb_destroy it holds no class
@@ -1260,11 +1650,28 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
        if (cl->children || cl->filter_cnt)
                return -EBUSY;
 
-       if (!cl->level && htb_parent_last_child(cl)) {
-               new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+       if (!cl->level && htb_parent_last_child(cl))
+               last_child = 1;
+
+       if (q->offload) {
+               err = htb_destroy_class_offload(sch, cl, last_child, false,
+                                               extack);
+               if (err)
+                       return err;
+       }
+
+       if (last_child) {
+               struct netdev_queue *dev_queue;
+
+               dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
+               new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
                                          cl->parent->common.classid,
                                          NULL);
-               last_child = 1;
+               if (q->offload) {
+                       if (new_q)
+                               htb_set_lockdep_class_child(new_q);
+                       htb_parent_to_leaf_offload(sch, dev_queue, new_q);
+               }
        }
 
        sch_tree_lock(sch);
@@ -1285,7 +1692,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
                                  &q->hlevel[cl->level].wait_pq);
 
        if (last_child)
-               htb_parent_to_leaf(q, cl, new_q);
+               htb_parent_to_leaf(sch, cl, new_q);
 
        sch_tree_unlock(sch);
 
@@ -1300,9 +1707,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
        int err = -EINVAL;
        struct htb_sched *q = qdisc_priv(sch);
        struct htb_class *cl = (struct htb_class *)*arg, *parent;
+       struct tc_htb_qopt_offload offload_opt;
        struct nlattr *opt = tca[TCA_OPTIONS];
        struct nlattr *tb[TCA_HTB_MAX + 1];
        struct Qdisc *parent_qdisc = NULL;
+       struct netdev_queue *dev_queue;
        struct tc_htb_opt *hopt;
        u64 rate64, ceil64;
        int warn = 0;
@@ -1335,8 +1744,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
                                              NULL));
 
+       rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
+       ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+
        if (!cl) {              /* new class */
-               struct Qdisc *new_q;
+               struct net_device *dev = qdisc_dev(sch);
+               struct Qdisc *new_q, *old_q;
                int prio;
                struct {
                        struct nlattr           nla;
@@ -1379,11 +1792,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                                                NULL,
                                                qdisc_root_sleeping_running(sch),
                                                tca[TCA_RATE] ? : &est.nla);
-                       if (err) {
-                               tcf_block_put(cl->block);
-                               kfree(cl);
-                               goto failure;
-                       }
+                       if (err)
+                               goto err_block_put;
                }
 
                cl->children = 0;
@@ -1392,12 +1802,76 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
                        RB_CLEAR_NODE(&cl->node[prio]);
 
+               cl->common.classid = classid;
+
+               /* Make sure nothing interrupts us in between of two
+                * ndo_setup_tc calls.
+                */
+               ASSERT_RTNL();
+
                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
                 * so that can't be used inside of sch_tree_lock
                 * -- thanks to Karlis Peisenieks
                 */
-               new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+               if (!q->offload) {
+                       dev_queue = sch->dev_queue;
+               } else if (!(parent && !parent->level)) {
+                       /* Assign a dev_queue to this classid. */
+                       offload_opt = (struct tc_htb_qopt_offload) {
+                               .command = TC_HTB_LEAF_ALLOC_QUEUE,
+                               .classid = cl->common.classid,
+                               .parent_classid = parent ?
+                                       TC_H_MIN(parent->common.classid) :
+                                       TC_HTB_CLASSID_ROOT,
+                               .rate = max_t(u64, hopt->rate.rate, rate64),
+                               .ceil = max_t(u64, hopt->ceil.rate, ceil64),
+                               .extack = extack,
+                       };
+                       err = htb_offload(dev, &offload_opt);
+                       if (err) {
+                               pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
+                                      err);
+                               goto err_kill_estimator;
+                       }
+                       dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
+               } else { /* First child. */
+                       dev_queue = parent->leaf.q->dev_queue;
+                       old_q = htb_graft_helper(dev_queue, NULL);
+                       WARN_ON(old_q != parent->leaf.q);
+                       offload_opt = (struct tc_htb_qopt_offload) {
+                               .command = TC_HTB_LEAF_TO_INNER,
+                               .classid = cl->common.classid,
+                               .parent_classid =
+                                       TC_H_MIN(parent->common.classid),
+                               .rate = max_t(u64, hopt->rate.rate, rate64),
+                               .ceil = max_t(u64, hopt->ceil.rate, ceil64),
+                               .extack = extack,
+                       };
+                       err = htb_offload(dev, &offload_opt);
+                       if (err) {
+                               pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
+                                      err);
+                               htb_graft_helper(dev_queue, old_q);
+                               goto err_kill_estimator;
+                       }
+                       parent->bstats_bias.bytes += old_q->bstats.bytes;
+                       parent->bstats_bias.packets += old_q->bstats.packets;
+                       qdisc_put(old_q);
+               }
+               new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
                                          classid, NULL);
+               if (q->offload) {
+                       if (new_q) {
+                               htb_set_lockdep_class_child(new_q);
+                               /* One ref for cl->leaf.q, the other for
+                                * dev_queue->qdisc.
+                                */
+                               qdisc_refcount_inc(new_q);
+                       }
+                       old_q = htb_graft_helper(dev_queue, new_q);
+                       /* No qdisc_put needed. */
+                       WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
+               }
                sch_tree_lock(sch);
                if (parent && !parent->level) {
                        /* turn parent into inner node */
@@ -1415,10 +1889,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                                         : TC_HTB_MAXDEPTH) - 1;
                        memset(&parent->inner, 0, sizeof(parent->inner));
                }
+
                /* leaf (we) needs elementary qdisc */
                cl->leaf.q = new_q ? new_q : &noop_qdisc;
 
-               cl->common.classid = classid;
                cl->parent = parent;
 
                /* set class to be in HTB_CAN_SEND state */
@@ -1444,12 +1918,30 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                        if (err)
                                return err;
                }
-               sch_tree_lock(sch);
-       }
 
-       rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
+               if (q->offload) {
+                       struct net_device *dev = qdisc_dev(sch);
+
+                       offload_opt = (struct tc_htb_qopt_offload) {
+                               .command = TC_HTB_NODE_MODIFY,
+                               .classid = cl->common.classid,
+                               .rate = max_t(u64, hopt->rate.rate, rate64),
+                               .ceil = max_t(u64, hopt->ceil.rate, ceil64),
+                               .extack = extack,
+                       };
+                       err = htb_offload(dev, &offload_opt);
+                       if (err)
+                               /* Estimator was replaced, and rollback may fail
+                                * as well, so we don't try to recover it, and
+                                * the estimator won't work property with the
+                                * offload anyway, because bstats are updated
+                                * only when the stats are queried.
+                                */
+                               return err;
+               }
 
-       ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+               sch_tree_lock(sch);
+       }
 
        psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
        psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
@@ -1492,6 +1984,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
        *arg = (unsigned long)cl;
        return 0;
 
+err_kill_estimator:
+       gen_kill_estimator(&cl->rate_est);
+err_block_put:
+       tcf_block_put(cl->block);
+       kfree(cl);
 failure:
        return err;
 }
@@ -1557,6 +2054,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 }
 
 static const struct Qdisc_class_ops htb_class_ops = {
+       .select_queue   =       htb_select_queue,
        .graft          =       htb_graft,
        .leaf           =       htb_leaf,
        .qlen_notify    =       htb_qlen_notify,
@@ -1579,6 +2077,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
        .dequeue        =       htb_dequeue,
        .peek           =       qdisc_peek_dequeued,
        .init           =       htb_init,
+       .attach         =       htb_attach,
        .reset          =       htb_reset,
        .destroy        =       htb_destroy,
        .dump           =       htb_dump,
index 6335230..1db9d4a 100644 (file)
@@ -529,7 +529,8 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
        kfree(cl);
 }
 
-static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
+static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
+                           struct netlink_ext_ack *extack)
 {
        struct qfq_sched *q = qdisc_priv(sch);
        struct qfq_class *cl = (struct qfq_class *)arg;
index da047a3..dde829d 100644 (file)
@@ -649,7 +649,8 @@ static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
        return -ENOSYS;
 }
 
-static int sfb_delete(struct Qdisc *sch, unsigned long cl)
+static int sfb_delete(struct Qdisc *sch, unsigned long cl,
+                     struct netlink_ext_ack *extack)
 {
        return -ENOSYS;
 }
index bd69a31..c5561d7 100644 (file)
@@ -3,4 +3,4 @@
 # Makefile for the Switch device API
 #
 
-obj-$(CONFIG_NET_SWITCHDEV) += switchdev.o
+obj-y += switchdev.o
index 2aca860..e926328 100644 (file)
@@ -117,10 +117,6 @@ struct sk_buff *tipc_msg_create(uint user, uint type,
        msg_set_origport(msg, oport);
        msg_set_destport(msg, dport);
        msg_set_errcode(msg, errcode);
-       if (hdr_sz > SHORT_H_SIZE) {
-               msg_set_orignode(msg, onode);
-               msg_set_destnode(msg, dnode);
-       }
        return buf;
 }
 
index 0d18b1d..5c903ab 100644 (file)
@@ -414,6 +414,7 @@ enum {
        TCA_HTB_RATE64,
        TCA_HTB_CEIL64,
        TCA_HTB_PAD,
+       TCA_HTB_OFFLOAD,
        __TCA_HTB_MAX,
 };
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
new file mode 100644 (file)
index 0000000..f813ffe
--- /dev/null
@@ -0,0 +1,64 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for physical ports resource. The test splits each splittable port
+# to its width and checks that eventually the number of physical ports equals
+# the maximum number of physical ports.
+
+PORT_NUM_NETIFS=0
+
+port_setup_prepare()
+{
+       :
+}
+
+port_cleanup()
+{
+       pre_cleanup
+
+       for port in "${unsplit[@]}"; do
+               devlink port unsplit $port
+               check_err $? "Did not unsplit $netdev"
+       done
+}
+
+split_all_ports()
+{
+       local should_fail=$1; shift
+       local -a unsplit
+
+       # Loop over the splittable netdevs and create tuples of netdev along
+       # with its width. For example:
+       # '$netdev1 $count1 $netdev2 $count2...', when:
+       # $netdev1-2 are splittable netdevs in the device, and
+       # $count1-2 are the netdevs width respectively.
+       while read netdev count <<<$(
+               devlink -j port show |
+               jq -r '.[][] | select(.splittable==true) | "\(.netdev) \(.lanes)"'
+               )
+               [[ ! -z $netdev ]]
+       do
+               devlink port split $netdev count $count
+               check_err $? "Did not split $netdev into $count"
+               unsplit+=( "${netdev}s0" )
+       done
+}
+
+port_test()
+{
+       local max_ports=$1; shift
+       local should_fail=$1; shift
+
+       split_all_ports $should_fail
+
+       occ=$(devlink -j resource show $DEVLINK_DEV \
+             | jq '.[][][] | select(.name=="physical_ports") |.["occ"]')
+
+       [[ $occ -eq $max_ports ]]
+       if [[ $should_fail -eq 0 ]]; then
+               check_err $? "Mismatch ports number: Expected $max_ports, got $occ."
+       else
+               check_err_fail $should_fail $? "Reached more ports than expected"
+       fi
+
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh
new file mode 100644 (file)
index 0000000..0b71dfb
--- /dev/null
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../port_scale.sh
+
+port_get_target()
+{
+       local should_fail=$1
+       local target
+
+       target=$(devlink_resource_size_get physical_ports)
+
+       if ((! should_fail)); then
+               echo $target
+       else
+               echo $((target + 1))
+       fi
+}
index d7cf33a..4a1c932 100755 (executable)
@@ -28,7 +28,7 @@ cleanup()
 
 trap cleanup EXIT
 
-ALL_TESTS="router tc_flower mirror_gre tc_police"
+ALL_TESTS="router tc_flower mirror_gre tc_police port"
 for current_test in ${TESTS:-$ALL_TESTS}; do
        source ${current_test}_scale.sh
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh
new file mode 100644 (file)
index 0000000..0b71dfb
--- /dev/null
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../port_scale.sh
+
+port_get_target()
+{
+       local should_fail=$1
+       local target
+
+       target=$(devlink_resource_size_get physical_ports)
+
+       if ((! should_fail)); then
+               echo $target
+       else
+               echo $((target + 1))
+       fi
+}
index 43f6624..087a884 100755 (executable)
@@ -22,7 +22,7 @@ cleanup()
 devlink_sp_read_kvd_defaults
 trap cleanup EXIT
 
-ALL_TESTS="router tc_flower mirror_gre tc_police"
+ALL_TESTS="router tc_flower mirror_gre tc_police port"
 for current_test in ${TESTS:-$ALL_TESTS}; do
        source ${current_test}_scale.sh
 
index fa5fa42..25f198b 100644 (file)
@@ -22,6 +22,7 @@ TEST_PROGS += devlink_port_split.py
 TEST_PROGS += drop_monitor_tests.sh
 TEST_PROGS += vrf_route_leaking.sh
 TEST_PROGS += bareudp.sh
+TEST_PROGS += unicast_extensions.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
index f74cd99..be34b9c 100755 (executable)
@@ -790,6 +790,81 @@ chk_join_nr "remove subflow and signal IPv6" 2 2 2
 chk_add_nr 1 1
 chk_rm_nr 1 1
 
+# subflow IPv4-mapped to IPv4-mapped
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow
+run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+chk_join_nr "single subflow IPv4-mapped" 1 1 1
+
+# signal address IPv4-mapped with IPv4-mapped sk
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal
+run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+chk_join_nr "signal address IPv4-mapped" 1 1 1
+chk_add_nr 1 1
+
+# subflow v4-map-v6
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+chk_join_nr "single subflow v4-map-v6" 1 1 1
+
+# signal address v4-map-v6
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+chk_join_nr "signal address v4-map-v6" 1 1 1
+chk_add_nr 1 1
+
+# subflow v6-map-v4
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow v6-map-v4" 1 1 1
+
+# signal address v6-map-v4
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "signal address v6-map-v4" 1 1 1
+chk_add_nr 1 1
+
+# no subflow IPv6 to v4 address
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "no JOIN with diff families v4-v6" 0 0 0
+
+# no subflow IPv6 to v4 address even if v6 has a valid v4 at the end
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0
+
+# no subflow IPv4 to v6 address, no need to slow down too then
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 dead:beef:1::1
+chk_join_nr "no JOIN with diff families v6-v4" 0 0 0
+
 # single subflow, backup
 reset
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
new file mode 100755 (executable)
index 0000000..dbf0421
--- /dev/null
@@ -0,0 +1,228 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# By Seth Schoen (c) 2021, for the IPv4 Unicast Extensions Project
+# Thanks to David Ahern for help and advice on nettest modifications.
+#
+# Self-tests for IPv4 address extensions: the kernel's ability to accept
+# certain traditionally unused or unallocated IPv4 addresses. For each kind
+# of address, we test for interface assignment, ping, TCP, and forwarding.
+# Must be run as root (to manipulate network namespaces and virtual
+# interfaces).
+#
+# Things we test for here:
+#
+# * Currently the kernel accepts addresses in 0/8 and 240/4 as valid.
+#
+# * Notwithstanding that, 0.0.0.0 and 255.255.255.255 cannot be assigned.
+#
+# * Currently the kernel DOES NOT accept unicast use of the lowest
+#   address in an IPv4 subnet (e.g. 192.168.100.0/32 in 192.168.100.0/24).
+#   This is treated as a second broadcast address, for compatibility
+#   with 4.2BSD (!).
+#
+# * Currently the kernel DOES NOT accept unicast use of any of 127/8.
+#
+# * Currently the kernel DOES NOT accept unicast use of any of 224/4.
+#
+# These tests provide an easy way to flip the expected result of any
+# of these behaviors for testing kernel patches that change them.
+
+# nettest can be run from PATH or from same directory as this selftest
+if ! which nettest >/dev/null; then
+       PATH=$PWD:$PATH
+       if ! which nettest >/dev/null; then
+               echo "'nettest' command not found; skipping tests"
+               exit 0
+       fi
+fi
+
+result=0
+
+hide_output(){ exec 3>&1 4>&2 >/dev/null 2>/dev/null; }
+show_output(){ exec >&3 2>&4; }
+
+show_result(){
+       if [ $1 -eq 0 ]; then
+               printf "TEST: %-60s  [ OK ]\n" "${2}"
+       else
+               printf "TEST: %-60s  [FAIL]\n" "${2}"
+               result=1
+       fi
+}
+
+_do_segmenttest(){
+       # Perform a simple set of link tests between a pair of
+       # IP addresses on a shared (virtual) segment, using
+       # ping and nettest.
+       # foo --- bar
+       # Arguments: ip_a ip_b prefix_length test_description
+       #
+       # Caller must set up foo-ns and bar-ns namespaces
+       # containing linked veth devices foo and bar,
+       # respectively.
+
+       ip -n foo-ns address add $1/$3 dev foo || return 1
+       ip -n foo-ns link set foo up || return 1
+       ip -n bar-ns address add $2/$3 dev bar || return 1
+       ip -n bar-ns link set bar up || return 1
+
+       ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1
+       ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1
+
+       nettest -B -N bar-ns -O foo-ns -r $1 || return 1
+       nettest -B -N foo-ns -O bar-ns -r $2 || return 1
+
+       return 0
+}
+
+_do_route_test(){
+       # Perform a simple set of gateway tests.
+       #
+       # [foo] <---> [foo1]-[bar1] <---> [bar]   /prefix
+       #  host          gateway          host
+       #
+       # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description
+       # Displays test result and returns success or failure.
+
+       # Caller must set up foo-ns, bar-ns, and router-ns
+       # containing linked veth devices foo-foo1, bar1-bar
+       # (foo in foo-ns, foo1 and bar1 in router-ns, and
+       # bar in bar-ns).
+
+       ip -n foo-ns address add $1/$5 dev foo || return 1
+       ip -n foo-ns link set foo up || return 1
+       ip -n foo-ns route add default via $2 || return 1
+       ip -n bar-ns address add $4/$5 dev bar || return 1
+       ip -n bar-ns link set bar up || return 1
+       ip -n bar-ns route add default via $3 || return 1
+       ip -n router-ns address add $2/$5 dev foo1 || return 1
+       ip -n router-ns link set foo1 up || return 1
+       ip -n router-ns address add $3/$5 dev bar1 || return 1
+       ip -n router-ns link set bar1 up || return 1
+
+       echo 1 | ip netns exec router-ns tee /proc/sys/net/ipv4/ip_forward
+
+       ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1
+       ip netns exec foo-ns timeout 2 ping -c 1 $4 || return 1
+       ip netns exec bar-ns timeout 2 ping -c 1 $3 || return 1
+       ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1
+
+       nettest -B -N bar-ns -O foo-ns -r $1 || return 1
+       nettest -B -N foo-ns -O bar-ns -r $4 || return 1
+
+       return 0
+}
+
+segmenttest(){
+       # Sets up veth link and tries to connect over it.
+       # Arguments: ip_a ip_b prefix_len test_description
+       hide_output
+       ip netns add foo-ns
+       ip netns add bar-ns
+       ip link add foo netns foo-ns type veth peer name bar netns bar-ns
+
+       test_result=0
+       _do_segmenttest "$@" || test_result=1
+
+       ip netns pids foo-ns | xargs -r kill -9
+       ip netns pids bar-ns | xargs -r kill -9
+       ip netns del foo-ns
+       ip netns del bar-ns
+       show_output
+
+       # inverted tests will expect failure instead of success
+       [ -n "$expect_failure" ] && test_result=`expr 1 - $test_result`
+
+       show_result $test_result "$4"
+}
+
+route_test(){
+       # Sets up a simple gateway and tries to connect through it.
+       # [foo] <---> [foo1]-[bar1] <---> [bar]   /prefix
+       # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description
+       # Returns success or failure.
+
+       hide_output
+       ip netns add foo-ns
+       ip netns add bar-ns
+       ip netns add router-ns
+       ip link add foo netns foo-ns type veth peer name foo1 netns router-ns
+       ip link add bar netns bar-ns type veth peer name bar1 netns router-ns
+
+       test_result=0
+       _do_route_test "$@" || test_result=1
+
+       ip netns pids foo-ns | xargs -r kill -9
+       ip netns pids bar-ns | xargs -r kill -9
+       ip netns pids router-ns | xargs -r kill -9
+       ip netns del foo-ns
+       ip netns del bar-ns
+       ip netns del router-ns
+
+       show_output
+
+       # inverted tests will expect failure instead of success
+       [ -n "$expect_failure" ] && test_result=`expr 1 - $test_result`
+       show_result $test_result "$6"
+}
+
+echo "###########################################################################"
+echo "Unicast address extensions tests (behavior of reserved IPv4 addresses)"
+echo "###########################################################################"
+#
+# Test support for 240/4
+segmenttest 240.1.2.1   240.1.2.4    24 "assign and ping within 240/4 (1 of 2) (is allowed)"
+segmenttest 250.100.2.1 250.100.30.4 16 "assign and ping within 240/4 (2 of 2) (is allowed)"
+#
+# Test support for 0/8
+segmenttest 0.1.2.17    0.1.2.23  24 "assign and ping within 0/8 (1 of 2) (is allowed)"
+segmenttest 0.77.240.17 0.77.2.23 16 "assign and ping within 0/8 (2 of 2) (is allowed)"
+#
+# Even 255.255/16 is OK!
+segmenttest 255.255.3.1 255.255.50.77 16 "assign and ping inside 255.255/16 (is allowed)"
+#
+# Or 255.255.255/24
+segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255/24 (is allowed)"
+#
+# Routing between different networks
+route_test 240.5.6.7 240.5.6.1  255.1.2.1    255.1.2.3      24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)"
+route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)"
+#
+# ==============================================
+# ==== TESTS THAT CURRENTLY EXPECT FAILURE =====
+# ==============================================
+expect_failure=true
+# It should still not be possible to use 0.0.0.0 or 255.255.255.255
+# as a unicast address.  Thus, these tests expect failure.
+segmenttest 0.0.1.5       0.0.0.0         16 "assigning 0.0.0.0 (is forbidden)"
+segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forbidden)"
+#
+# Test support for not having all of 127 be loopback
+# Currently Linux does not allow this, so this should fail too
+segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)"
+#
+# Test support for lowest address
+# Currently Linux does not allow this, so this should fail too
+segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (is forbidden)"
+#
+# Routing using lowest address as a gateway/endpoint
+# Currently Linux does not allow this, so this should fail too
+route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address (is forbidden)"
+#
+# Test support for unicast use of class D
+# Currently Linux does not allow this, so this should fail too
+segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)"
+#
+# Routing using class D as a gateway
+route_test 225.1.42.1 225.1.42.2 9.8.7.6 9.8.7.1 24 "routing using class D (is forbidden)"
+#
+# Routing using 127/8
+# Currently Linux does not allow this, so this should fail too
+route_test 127.99.2.3 127.99.2.4 200.1.2.3 200.1.2.4 24 "routing using 127/8 (is forbidden)"
+#
+unset expect_failure
+# =====================================================
+# ==== END OF TESTS THAT CURRENTLY EXPECT FAILURE =====
+# =====================================================
+exit ${result}